diff --git a/.gitignore b/.gitignore
index 133c126b96ff..d21e46cb2d43 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,8 @@ result
 tests/data/asr
 .DS_Store
 bert.pt.json
+work
+fastspeech_output
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -21,6 +23,7 @@ __pycache__/
 # Distribution / packaging
 .idea
 .Python
+wandb
 build/
 develop-eggs/
 dist/
@@ -152,3 +155,9 @@ examples/*/wandb
 examples/*/data
 wandb
 dump.py
+
+docs/sources/source/test_build/
+
+# Checkpoints, config files and temporary files created in tutorials.
+examples/neural_graphs/*.chkpt
+examples/neural_graphs/*.yml
\ No newline at end of file
diff --git a/.lgtm.yml b/.lgtm.yml
deleted file mode 100644
index c5c4f09ee07c..000000000000
--- a/.lgtm.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-queries:
-  - include: py/print-during-import
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 000000000000..40b9e2dd8492
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,31 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required field.
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx.
+sphinx:
+  configuration: docs/sources/source/conf.py
+
+# Set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - requirements: requirements/requirements_docs.txt
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d56bfdf4c471..d5d85e0f472b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -70,14 +70,69 @@ To release a new version, please update the changelog as followed:
 ## [Unreleased]
 
 ### Added
+- Added NeMoModels class. Implemented in ASR collection: ASRConvCTCModel, and QuartzNet and JasperNet as its children - @okuchaiev
+- Added multi-dataset data-layer and dataset.
+([PR #538](https://github.com/NVIDIA/NeMo/pull/538)) - @yzhang123
+- Online Data Augmentation for ASR Collection. ([PR #565](https://github.com/NVIDIA/NeMo/pull/565)) - @titu1994
+- Speed augmentation on CPU, TimeStretch augmentation on CPU+GPU ([PR #594](https://github.com/NVIDIA/NeMo/pull/565)) - @titu1994
+- Added TarredAudioToTextDataLayer, which allows for loading ASR datasets with tarred audio. Existing datasets can be converted with the `convert_to_tarred_audio_dataset.py` script. ([PR #602](https://github.com/NVIDIA/NeMo/pull/602))
+- Online audio augmentation notebook in ASR examples ([PR #605](https://github.com/NVIDIA/NeMo/pull/605)) - @titu1994
+- ContextNet Encoder + Decoder Initial Support ([PR #630](https://github.com/NVIDIA/NeMo/pull/630)) - @titu1994
+- Added finetuning with Megatron-LM ([PR #601](https://github.com/NVIDIA/NeMo/pull/601)) - @ekmb
+- Added documentation for 8 kHz model ([PR #632](https://github.com/NVIDIA/NeMo/pull/632)) - @jbalam-nv
+
+
+### Changed
+- quartznet and jasper ASR examples reworked into speech2text.py and speech2text_infer.py - @okuchaiev
+- Syncs across workers at each step to check for NaN or inf loss. Terminates all workers if stop\_on\_nan\_loss is set (as before), lets Apex deal with it if apex.amp optimization level is O1 or higher, and skips the step across workers otherwise. ([PR #637](https://github.com/NVIDIA/NeMo/pull/637)) - @redoctopus
+- Updated the callback system. Old callbacks will be deprecated in version 0.12. ([PR #615](https://github.com/NVIDIA/NeMo/pull/615)) - @blisc
+
+### Dependencies Update
+
+### Deprecated
+
+### Fixed
+
+### Removed
+
+### Security
+
+### Contributors
+
+## [0.10.2] - 2020-05-05
+
+### Added
+- The Neural Graph is a high-level abstract concept empowering the users to build graphs consisting of many, interconnected Neural Modules. A user in his/her application can build any number of graphs, potentially spanning over the same modules. The import/export options combined with the lightweight API make Neural Graphs a perfect tool for rapid prototyping and experimentation. ([PR #413](https://github.com/NVIDIA/NeMo/pull/413)) - @tkornuta
+
+## [0.10.0] - 2020-04-03
+
+### Added
+- Roberta and Albert support added to GLUE script, data caching also added.
+([PR #413](https://github.com/NVIDIA/NeMo/pull/413)) - @ekmb
+- text classification notebook added
+([PR #382](https://github.com/NVIDIA/NeMo/pull/382)) - @ericharper
+- New Neural Type System documentation. Also added decorator to generate docs for input/output ports.
+([PR #370](https://github.com/NVIDIA/NeMo/pull/370)) - @okuchaiev
 - New Neural Type System and its tests.
 ([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev
 - Named tensors tuple module's output for graph construction.
 ([PR #268](https://github.com/NVIDIA/NeMo/pull/268)) - @stasbel
 - Introduced the `deprecated` decorator.
 ([PR #298](https://github.com/NVIDIA/NeMo/pull/298)) - @tkornuta-nvidia
+- Implemented new mechanisms for importing and exporting of module configuration (init_params) to configuration (yml)
+files, along with unit tests, examples and tutorials
+([PR #339](https://github.com/NVIDIA/NeMo/pull/339)) - @tkornuta-nvidia
+- Speech Commands support.
+([PR #375](https://github.com/NVIDIA/NeMo/pull/375)) - @titu1994
 
 ### Changed
+- Refactoring of `nemo_nlp` collections:
+([PR #368](https://github.com/NVIDIA/NeMo/pull/368)) - @VahidooX, @yzhang123, @ekmb
+    - renaming and restructuring of files, folder, and functions in `nemo_nlp`
+    - losses cleaned up. LossAggregatorNM moved to nemo/backends/pytorch/common/losses
+ ([PR #316](https://github.com/NVIDIA/NeMo/pull/316)) - @VahidooX, @yzhang123, @ekmb
+    - renaming and restructuring of files, folder, and functions in `nemo_nlp`
+    - Updated licenses
 - All collections changed to use New Neural Type System.
 ([PR #307](https://github.com/NVIDIA/NeMo/pull/307)) - @okuchaiev
 - Additional Collections Repositories merged into core `nemo_toolkit` package.
@@ -86,12 +141,8 @@ To release a new version, please update the changelog as followed:
 ([PR #284](https://github.com/NVIDIA/NeMo/pull/284)) - @stasbel
 - NeMo is not longer using pep8 code style rules. Code style rules are now enforced with `isort` and `black` incorporated into CI checks.
 ([PR #286](https://github.com/NVIDIA/NeMo/pull/286)) - @stasbel
-- Major cleanup of Neural Module constructors (init), aiming at increasing the framework robustness: cleanup of NeuralModule initialization logic, refactor of trainer/actions (getting rid of local_params), fixes of several examples and unit tests, extraction and storing of intial parameters (init_params).  
+- Major cleanup of Neural Module constructors (init), aiming at increasing the framework robustness: cleanup of NeuralModule initialization logic, refactor of trainer/actions (getting rid of local_params), fixes of several examples and unit tests, extraction and storing of intial parameters (init_params).
 ([PR #309](https://github.com/NVIDIA/NeMo/pull/309)) - @tkornuta-nvidia
-- Refactoring of `nemo_nlp` collections: 
-([PR #316](https://github.com/NVIDIA/NeMo/pull/316)) - @VahidooX, @yzhang123, @ekmb
-    - renaming of files and restructuring of folder in `nemo_nlp`
-    - Updated licenses
 - Updated nemo's use of the logging library. from nemo import logging is now the reccomended way of using the nemo logger. neural_factory.logger and all other instances of logger are now deprecated and planned for removal in the next version. Please see PR 267 for complete change information.
 ([PR #267](https://github.com/NVIDIA/NeMo/pull/267), [PR #283](https://github.com/NVIDIA/NeMo/pull/283), [PR #305](https://github.com/NVIDIA/NeMo/pull/305), [PR #311](https://github.com/NVIDIA/NeMo/pull/311)) - @blisc
 - Changed Distributed Data Parallel from Apex to Torch
@@ -99,6 +150,10 @@ To release a new version, please update the changelog as followed:
 
 - Added TRADE (dialogue state tracking model) on MultiWOZ dataset
 ([PR #322](https://github.com/NVIDIA/NeMo/pull/322)) - @chiphuyen, @VahidooX
+- Question answering:
+([PR #390](https://github.com/NVIDIA/NeMo/pull/390)) - @yzhang123
+    - Changed question answering task to use Roberta and Albert as alternative backends to Bert
+    - Added inference mode that does not require ground truth labels
 
 ### Dependencies Update
 - Added dependency on `wrapt` (the new version of the `deprecated` warning) - @tkornuta-nvidia, @DEKHTIARJonathan
@@ -106,8 +161,10 @@ To release a new version, please update the changelog as followed:
 ### Deprecated
 
 ### Fixed
-- Critical fix of the training action on CPU 
+- Critical fix of the training action on CPU
 ([PR #308](https://github.com/NVIDIA/NeMo/pull/309)) - @tkornuta-nvidia
+- Fixed issue in Tacotron 2 prenet
+([PR #444](https://github.com/NVIDIA/NeMo/pull/444)) - @blisc
 
 ### Removed
 - gradient_predivide_factor arg of train() now has no effect
@@ -166,7 +223,8 @@ This release also includes nemo_asr'' and nemo_nlp'' collections for Speech Reco
 
 Please refer to the documentation here: https://nvidia.github.io/NeMo/
 
-[Unreleased]: https://github.com/NVIDIA/NeMo/compare/v0.9.0...master
+[Unreleased]: https://github.com/NVIDIA/NeMo/compare/v0.10.0...master
+[0.10.0]: https://github.com/NVIDIA/NeMo/compare/v0.9.0...v0.10.0
 [0.9.0]: https://github.com/NVIDIA/NeMo/compare/v0.8.2...v0.9.0
 [0.8.2]: https://github.com/NVIDIA/NeMo/compare/v0.8.1...v0.8.2
 [0.8.1]: https://github.com/NVIDIA/NeMo/compare/r0.8...v0.8.1
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3b3c46f5dcae..f13030fb2251 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,9 +4,9 @@
 
 2) Make sure you sign your commits. E.g. use ``git commit -s`` when commiting
 
-3) Make sure all unittests finish successfully before sending PR
+3) Make sure all unittests finish successfully before sending PR ``python -m unittest`` from NeMo's root folder
 
-4) Send your Pull Request to `master` branch
+4) Send your Pull Request to the `master` branch
 
 
 # Collection Guidelines
@@ -28,9 +28,8 @@ Please note that CI needs to pass for all the modules and collections.
 1. **Sensible**: code should make sense. If you think a piece of code might be confusing, write comments.
 
 ## Python style
-We follow [PEP 8 style guide](https://www.python.org/dev/peps/pep-0008/) and we incorporate [pycodestyle](https://pypi.org/project/pycodestyle/) into our CI pipeline to check for style. Make sure that your code passes PEP 8 before creating a Pull Request.
-
-There are several tools to automatically format your code to be PEP 8 compliant, such as [autopep8](https://github.com/hhatto/autopep8). Your text editor might support its own auto PEP 8 plugin.
+We use ``black`` as our style guide. To check whether your code will pass style check (from the NeMo's repo folder) run:
+``python setup.py style`` and if it does not pass run ``python setup.py style --fix``.
 
 1. Avoid wild import: ``from X import *`` unless in ``X.py``, ``__all__`` is defined.
 1. Minimize the use of ``**kwargs``.
@@ -47,7 +46,10 @@ There are several tools to automatically format your code to be PEP 8 compliant,
 1. If a comment lasts multiple lines, use ``'''`` instead of ``#``.
 
 ## Nemo style
-1. If you import a module from the same collection, use relative path instead of absolute path. For example, inside ``nemo_nlp``, use ``.utils`` instead of ``nemo_nelp.utils``.
+1. Use absolute paths.
 1. Before accessing something, always make sure that it exists.
 1. Right inheritance. For example, if a module doesn't have any trainable weights, don't inherit from TrainableNM.
 1. Naming consistency, both within NeMo and between NeMo and external literature. E.g. use the name ``logits`` for ``log_probs``, ``hidden_size`` for ``d_model``.
+1. Make an effort to use the right Neural Types when designing your neural modules. If a type you need does not
+ exists - you can introduce one. See documentation on how to do this
+1. When creating input/ouput ports for your modules use "add_port_docs" decorator to nicely generate docs for them
diff --git a/Dockerfile b/Dockerfile
index d77a8fcd26a0..6010887ff8c0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -30,14 +30,23 @@ RUN apt-get update && \
     python-dev && \
     rm -rf /var/lib/apt/lists/*
 
-# install onnx trt open source plugins
+# install trt
 ENV PATH=$PATH:/usr/src/tensorrt/bin
-WORKDIR /tmp/onnx-trt
-COPY scripts/docker/onnx-trt.patch .
-RUN git clone -n https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && \
-    git checkout 8716c9b && git submodule update --init --recursive && patch -f < ../onnx-trt.patch && \
-    mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && \
-    make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig && rm -rf /tmp/onnx-tensorrt
+WORKDIR /tmp/trt-oss
+ARG NV_REPO=https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64
+
+RUN cd /tmp/trt-oss
+ARG DEB=libcudnn7_7.6.5.32-1+cuda10.2_amd64.deb
+RUN curl -sL --output ${DEB} ${NV_REPO}/${DEB}
+ARG DEB=libnvinfer7_7.0.0-1+cuda10.2_amd64.deb
+RUN curl -sL --output ${DEB} ${NV_REPO}/${DEB}
+ARG DEB=libnvinfer-plugin7_7.0.0-1+cuda10.2_amd64.deb
+RUN curl -sL --output ${DEB} ${NV_REPO}/${DEB}
+ARG DEB=libnvonnxparsers7_7.0.0-1+cuda10.2_amd64.deb
+RUN curl -sL --output ${DEB} ${NV_REPO}/${DEB}
+ARG DEB=python-libnvinfer_7.0.0-1+cuda10.2_amd64.deb
+RUN curl -sL --output ${DEB} ${NV_REPO}/${DEB}
+RUN dpkg -i *.deb && cd ../.. && rm -rf /tmp/trt-oss
 
 # install nemo dependencies
 WORKDIR /tmp/nemo
@@ -52,6 +61,7 @@ COPY . .
 FROM nemo-deps as nemo
 ARG NEMO_VERSION
 ARG BASE_IMAGE
+
 # Check that NEMO_VERSION is set. Build will fail without this. Expose NEMO and base container
 # version information as runtime environment variable for introspection purposes
 RUN /usr/bin/test -n "$NEMO_VERSION" && \
diff --git a/Jenkinsfile b/Jenkinsfile
index c37479f37ed8..60d6961824a6 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -2,7 +2,7 @@ pipeline {
   agent {
         docker {
             image 'nvcr.io/nvidia/pytorch:20.01-py3'
-            args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home:/home --shm-size=8g'
+            args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache/torch:/root/.cache/torch --shm-size=8g'
         }
   }
   options {
@@ -11,28 +11,83 @@ pipeline {
   }
   stages {
 
-    stage('L0: PyTorch version') {
+    stage('PyTorch version') {
       steps {
         sh 'python -c "import torch; print(torch.__version__)"'
       }
     }
-    stage('L0: Install test requirements') {
+    stage('Install test requirements') {
       steps {
         sh 'apt-get update && apt-get install -y bc && pip install -r requirements/requirements_test.txt'
       }
     }
-    stage('L0: Code formatting checks') {
+    stage('Code formatting checks') {
       steps {
         sh 'python setup.py style'
       }
     }
-    stage('L0: Unittests ALL') {
+    stage('Documentation check') {
       steps {
-        sh './reinstall.sh && python -m unittest'
+        sh './reinstall.sh && pytest -m docs'
       }
     }
 
-    stage('L1: Parallel Stage1') {
+
+    stage('L0: Unit Tests GPU') {
+      steps {
+        sh 'pytest -m "unit and not skipduringci"'
+      }
+    }
+
+    stage('L0: Unit Tests CPU') {
+      when {
+        anyOf{
+          branch 'master'
+          changeRequest target: 'master'
+        }
+      }
+      steps {
+        sh 'pytest -m unit --cpu'
+      }
+    }
+
+    stage('L0: Integration Tests GPU') {
+      steps {
+        sh 'pytest -s -m "integration and not skipduringci"'
+      }
+    }
+
+    stage('L0: Integration Tests CPU') {
+      when {
+        anyOf{
+          branch 'master'
+          changeRequest target: 'master'
+        }
+      }
+      steps {
+        sh 'pytest -s -m integration --cpu'
+      }
+    }
+
+    stage('L1: System Tests GPU') {
+      steps {
+        sh 'pytest -m "system and not skipduringci"'
+      }
+    }
+
+    stage('L1: System Tests CPU') {
+      when {
+        anyOf{
+          branch 'master'
+          changeRequest target: 'master'
+        }
+      }
+      steps {
+        sh 'pytest -m system --cpu'
+      }
+    }
+
+    stage('L2: Parallel Stage1 GPU') {
       when {
         anyOf{
           branch 'master'
@@ -54,7 +109,7 @@ pipeline {
       }
     }
 
-    stage('L1: Parallel NLP-BERT pretraining') {
+    stage('L2: Parallel NLP-BERT pretraining') {
       when {
         anyOf{
           branch 'master'
@@ -62,17 +117,17 @@ pipeline {
         }
       }
       failFast true
-      parallel { 
+      parallel {
         stage('BERT on the fly preprocessing') {
           steps {
-            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=0 python bert_pretraining.py --amp_opt_level O1 --data_dir /home/TestData/nlp/wikitext-2 --dataset_name wikitext-2 --work_dir outputs/bert_lm/wikitext2 --batch_size 64 --lr 0.01 --lr_policy CosineAnnealing --lr_warmup_proportion 0.05 --tokenizer sentence-piece --vocab_size 3200 --hidden_size 768 --intermediate_size 3072 --num_hidden_layers 6 --num_attention_heads 12 --hidden_act "gelu" --save_step_freq 200 --sample_size 10000000 --mask_probability 0.15 --short_seq_prob 0.1 --max_steps=300'
+            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=0 python bert_pretraining.py --amp_opt_level O1 --train_data /home/TestData/nlp/wikitext-2/train.txt --eval_data /home/TestData/nlp/wikitext-2/valid.txt --work_dir outputs/bert_lm/wikitext2 --batch_size 64 --lr 0.01 --lr_policy CosineAnnealing --lr_warmup_proportion 0.05 --vocab_size 3200 --hidden_size 768 --intermediate_size 3072 --num_hidden_layers 6 --num_attention_heads 12 --hidden_act "gelu" --save_step_freq 200 data_text  --num_iters=300 --tokenizer sentence-piece --sample_size 10000000 --mask_probability 0.15 --short_seq_prob 0.1 --dataset_name wikitext-2'
             sh 'cd examples/nlp/language_modeling && LOSS=$(cat outputs/bert_lm/wikitext2/log_globalrank-0_localrank-0.txt |   grep "Loss" |tail -n 1| awk \'{print \$7}\' | egrep -o "[0-9.]+" ) && echo $LOSS && if [ $(echo "$LOSS < 8.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
-            sh 'rm -rf examples/nlp/language_modeling/outputs/wikitext2'
+            sh 'rm -rf examples/nlp/language_modeling/outputs/wikitext2 && rm -rf /home/TestData/nlp/wikitext-2/*.pkl && rm -rf /home/TestData/nlp/wikitext-2/bert'
           }
-        }        
+        }
         stage('BERT offline preprocessing') {
           steps {
-            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=1 python bert_pretraining.py --amp_opt_level O1 --data_dir /home/TestData/nlp/wiki_book_mini  --work_dir outputs/bert_lm/wiki_book --batch_size 8 --config_file /home/TestData/nlp/bert_configs/uncased_L-12_H-768_A-12.json  --save_step_freq 200 --max_steps 300  --num_gpus 1  --batches_per_step 1 --lr_policy SquareRootAnnealing --beta2 0.999 --beta1 0.9  --lr_warmup_proportion 0.01 --optimizer adam_w  --weight_decay 0.01  --lr 0.875e-4 --preprocessed_data '
+            sh 'cd examples/nlp/language_modeling && CUDA_VISIBLE_DEVICES=1 python bert_pretraining.py --amp_opt_level O1 --train_data /home/TestData/nlp/wiki_book_mini/training --eval_data /home/TestData/nlp/wiki_book_mini/evaluation --work_dir outputs/bert_lm/wiki_book --batch_size 8 --config_file /home/TestData/nlp/bert_configs/uncased_L-12_H-768_A-12.json  --save_step_freq 200 --num_gpus 1 --batches_per_step 1 --lr_policy SquareRootAnnealing --beta2 0.999 --beta1 0.9  --lr_warmup_proportion 0.01 --optimizer adam_w  --weight_decay 0.01  --lr 0.875e-4 data_preprocessed --num_iters 300'
             sh 'cd examples/nlp/language_modeling && LOSS=$(cat outputs/bert_lm/wiki_book/log_globalrank-0_localrank-0.txt |  grep "Loss" |tail -n 1| awk \'{print \$7}\' | egrep -o "[0-9.]+" ) && echo $LOSS && if [ $(echo "$LOSS < 15.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
             sh 'rm -rf examples/nlp/language_modeling/outputs/wiki_book'
           }
@@ -80,7 +135,7 @@ pipeline {
       }
     }
 
-    stage('L1: Parallel NLP Examples 1') {
+    stage('L2: Parallel NLP Examples 1') {
       when {
         anyOf{
           branch 'master'
@@ -91,19 +146,21 @@ pipeline {
       parallel {
         stage ('Text Classification with BERT Test') {
           steps {
-            sh 'cd examples/nlp/text_classification && CUDA_VISIBLE_DEVICES=0 python text_classification_with_bert.py --num_epochs=1 --max_seq_length=50 --dataset_name=jarvis --data_dir=/home/TestData/nlp/retail/ --eval_file_prefix=eval --batch_size=10 --num_train_samples=-1 --do_lower_case --shuffle_data --work_dir=outputs'
+            sh 'cd examples/nlp/text_classification && CUDA_VISIBLE_DEVICES=0 python text_classification_with_bert.py --pretrained_model_name bert-base-uncased --num_epochs=1 --max_seq_length=50 --data_dir=/home/TestData/nlp/retail/ --eval_file_prefix=dev --batch_size=10 --num_train_samples=-1 --do_lower_case --work_dir=outputs'
             sh 'rm -rf examples/nlp/text_classification/outputs'
           }
         }
         stage ('Dialogue State Tracking - TRADE - Multi-GPUs') {
           steps {
-            sh 'cd examples/nlp/dialogue_state_tracking && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 dialogue_state_tracking_trade.py --batch_size=10 --eval_batch_size=10 --num_train_samples=-1 --num_eval_samples=-1 --num_epochs=1 --dropout=0.2 --eval_file_prefix=test --shuffle_data --num_gpus=2 --lr=0.001 --grad_norm_clip=10 --work_dir=outputs --data_dir=/home/TestData/nlp/multiwoz2.1'
+            sh 'rm -rf /home/TestData/nlp/multiwoz2.1/vocab.pkl'
+            sh 'cd examples/nlp/dialogue_state_tracking && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 dialogue_state_tracking_trade.py --batch_size=10 --eval_batch_size=10 --num_train_samples=-1 --num_eval_samples=-1 --num_epochs=1 --dropout=0.2 --eval_file_prefix=test --num_gpus=2 --lr=0.001 --grad_norm_clip=10 --work_dir=outputs --data_dir=/home/TestData/nlp/multiwoz2.1'
             sh 'rm -rf examples/nlp/dialogue_state_tracking/outputs'
+            sh 'rm -rf /home/TestData/nlp/multiwoz2.1/vocab.pkl'
           }
         }
         stage ('GLUE Benchmark Test') {
           steps {
-            sh 'cd examples/nlp/glue_benchmark && CUDA_VISIBLE_DEVICES=1 python glue_benchmark_with_bert.py --data_dir /home/TestData/nlp/glue_fake/MRPC --work_dir glue_output --save_step_freq -1 --num_epochs 1 --task_name mrpc --batch_size 2'
+            sh 'cd examples/nlp/glue_benchmark && CUDA_VISIBLE_DEVICES=1 python glue_benchmark_with_bert.py --data_dir /home/TestData/nlp/glue_fake/MRPC --pretrained_model_name bert-base-uncased --work_dir glue_output --save_step_freq -1 --num_epochs 1 --task_name mrpc --batch_size 2 --no_data_cache'
             sh 'rm -rf examples/nlp/glue_benchmark/glue_output'
           }
         }
@@ -111,7 +168,7 @@ pipeline {
     }
 
 
-    stage('L1: Parallel NLP Examples 2') {
+    stage('L2: Parallel NLP Examples 2') {
       when {
         anyOf{
           branch 'master'
@@ -122,22 +179,40 @@ pipeline {
       parallel {
         stage('Token Classification Training/Inference Test') {
           steps {
-            sh 'cd examples/nlp/token_classification && CUDA_VISIBLE_DEVICES=0 python token_classification.py --data_dir /home/TestData/nlp/token_classification_punctuation/ --batch_size 2 --num_epochs 1 --save_epoch_freq 1 --work_dir token_classification_output --pretrained_bert_model bert-base-cased'
-            sh 'cd examples/nlp/token_classification && DATE_F=$(ls token_classification_output/) && CUDA_VISIBLE_DEVICES=0 python token_classification_infer.py --work_dir token_classification_output/$DATE_F/checkpoints/ --labels_dict /home/TestData/nlp/token_classification_punctuation/label_ids.csv --pretrained_bert_model bert-base-cased'
+            sh 'cd examples/nlp/token_classification && CUDA_VISIBLE_DEVICES=0 python token_classification.py --data_dir /home/TestData/nlp/token_classification_punctuation/ --batch_size 2 --num_epochs 1 --save_epoch_freq 1 --work_dir token_classification_output --pretrained_model_name bert-base-uncased'
+            sh 'cd examples/nlp/token_classification && DATE_F=$(ls token_classification_output/) && CUDA_VISIBLE_DEVICES=0 python token_classification_infer.py --checkpoint_dir token_classification_output/$DATE_F/checkpoints/ --labels_dict /home/TestData/nlp/token_classification_punctuation/label_ids.csv --pretrained_model_name bert-base-uncased'
             sh 'rm -rf examples/nlp/token_classification/token_classification_output'
           }
         }
+        stage('Megatron finetuning Token Classification Training/Inference Test') {
+          steps {
+            sh 'cd examples/nlp/token_classification && CUDA_VISIBLE_DEVICES=0 python token_classification.py --data_dir /home/TestData/nlp/token_classification_punctuation/ --batch_size 2 --num_epochs 1 --save_epoch_freq 1 --work_dir megatron_output --pretrained_model_name megatron-bert-345m-uncased'
+            sh 'cd examples/nlp/token_classification && DATE_F=$(ls megatron_output/) && CUDA_VISIBLE_DEVICES=0 python token_classification_infer.py --checkpoint_dir megatron_output/$DATE_F/checkpoints/ --labels_dict /home/TestData/nlp/token_classification_punctuation/label_ids.csv --pretrained_model_name megatron-bert-345m-uncased'
+            sh 'rm -rf examples/nlp/token_classification/megatron_output'
+          }
+        }
         stage ('Punctuation and Classification Training/Inference Test') {
           steps {
-            sh 'cd examples/nlp/token_classification && CUDA_VISIBLE_DEVICES=1 python punctuation_capitalization.py --data_dir /home/TestData/nlp/token_classification_punctuation/ --work_dir punctuation_output --save_epoch_freq 1 --num_epochs 1 --save_step_freq -1 --batch_size 2'
-            sh 'cd examples/nlp/token_classification && DATE_F=$(ls punctuation_output/) && DATA_DIR="/home/TestData/nlp/token_classification_punctuation" && CUDA_VISIBLE_DEVICES=1 python punctuation_capitalization_infer.py --checkpoints_dir punctuation_output/$DATE_F/checkpoints/ --punct_labels_dict $DATA_DIR/punct_label_ids.csv --capit_labels_dict $DATA_DIR/capit_label_ids.csv'
+            sh 'cd examples/nlp/token_classification && CUDA_VISIBLE_DEVICES=1 python punctuation_capitalization.py \
+            --data_dir /home/TestData/nlp/token_classification_punctuation/ --work_dir punctuation_output --save_epoch_freq 1 \
+            --num_epochs 1 --save_step_freq -1 --batch_size 2'
+            sh 'cd examples/nlp/token_classification && DATE_F=$(ls punctuation_output/) && DATA_DIR="/home/TestData/nlp/token_classification_punctuation" && CUDA_VISIBLE_DEVICES=1 python punctuation_capitalization_infer.py --checkpoint_dir punctuation_output/$DATE_F/checkpoints/ --punct_labels_dict $DATA_DIR/punct_label_ids.csv --capit_labels_dict $DATA_DIR/capit_label_ids.csv'
             sh 'rm -rf examples/nlp/token_classification/punctuation_output'
           }
         }
+        stage('SGD Test') {
+          steps {
+            sh 'cd examples/nlp/dialogue_state_tracking && CUDA_VISIBLE_DEVICES=0 python dialogue_state_tracking_sgd.py \
+            --data_dir /home/TestData/nlp/sgd/ --schema_embedding_dir /home/TestData/nlp/sgd/embeddings/ --eval_dataset dev \
+            --dialogues_example_dir /home/TestData/nlp/sgd/dialogue_example_dir/ --work_dir sgd_output --task debug_sample \
+            --num_epochs 1 --save_epoch_freq=0 --no_overwrite_schema_emb_files --no_overwrite_dial_files'
+            sh 'rm -rf examples/nlp/dialogue_state_tracking/sgd_output'
+          }
+        }
       }
     }
 
-    stage('L1: Parallel NLP-Squad') {
+    stage('L2: Parallel NLP-Squad') {
       when {
         anyOf{
           branch 'master'
@@ -148,22 +223,22 @@ pipeline {
       parallel {
         stage('BERT Squad v1.1') {
           steps {
-            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=0 python question_answering_squad.py --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v1.1/train-v1.1.json --dev_file /home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json --work_dir outputs/squadv1 --batch_size 8 --save_step_freq 300 --num_epochs 3 --lr_policy WarmupAnnealing  --lr 3e-5 --do_lower_case'
-            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv1/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 50.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=0 python question_answering_squad.py --no_data_cache --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v1.1/train-v1.1.json --eval_file /home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json --work_dir outputs/squadv1 --batch_size 8 --save_step_freq 200 --max_steps 50 --train_step_freq 5 --lr_policy WarmupAnnealing  --lr 5e-5 --do_lower_case --pretrained_model_name bert-base-uncased --optimizer adam_w'
+            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv1/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 10.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
             sh 'rm -rf examples/nlp/question_answering/outputs/squadv1 && rm -rf /home/TestData/nlp/squad_mini/v1.1/*cache*'
           }
         }
         stage('BERT Squad v2.0') {
           steps {
-            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=1 python question_answering_squad.py --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v2.0/train-v2.0.json --dev_file /home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json --work_dir outputs/squadv2 --batch_size 8 --save_step_freq 300 --num_epochs 3 --lr_policy WarmupAnnealing  --lr 3e-5 --do_lower_case --version_2_with_negative'
-            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv2/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 50.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=1 python question_answering_squad.py --no_data_cache --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v2.0/train-v2.0.json --eval_file /home/TestData/nlp/squad_mini/v2.0/dev-v2.0.json --work_dir outputs/squadv2 --batch_size 8 --save_step_freq 200 --train_step_freq 2 --max_steps 10 --lr_policy WarmupAnnealing  --lr 1e-5 --do_lower_case --version_2_with_negative --pretrained_model_name bert-base-uncased --optimizer adam_w'
+            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv2/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 40.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
             sh 'rm -rf examples/nlp/question_answering/outputs/squadv2 && rm -rf /home/TestData/nlp/squad_mini/v2.0/*cache*'
           }
         }
       }
     }
 
-    stage('L1: Parallel NLP-Examples 3') {
+    stage('L2: Parallel NLP-Examples 3') {
       when {
         anyOf{
           branch 'master'
@@ -171,7 +246,7 @@ pipeline {
         }
       }
       failFast true
-      parallel { 
+      parallel {
         stage('asr_processing') {
           steps {
             sh 'cd examples/nlp/asr_postprocessor && CUDA_VISIBLE_DEVICES=0 python asr_postprocessor.py --data_dir=/home/TestData/nlp/asr_postprocessor/pred_real --restore_from=/home/TestData/nlp/asr_postprocessor/bert-base-uncased_decoder.pt --max_steps=25 --batch_size=64'
@@ -181,15 +256,15 @@ pipeline {
         }
         stage('Roberta Squad v1.1') {
           steps {
-            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=1 python question_answering_squad.py --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v1.1/train-v1.1.json --dev_file /home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json --work_dir outputs/squadv1_roberta --batch_size 2 --save_step_freq 500 --num_epochs 1 --lr_policy WarmupAnnealing  --lr 3e-5 --do_lower_case  --model_type roberta --pretrained_model_name roberta-base'
-            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv1_roberta/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 50.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+            sh 'cd examples/nlp/question_answering && CUDA_VISIBLE_DEVICES=1 python question_answering_squad.py --no_data_cache --amp_opt_level O1 --train_file /home/TestData/nlp/squad_mini/v1.1/train-v1.1.json --eval_file /home/TestData/nlp/squad_mini/v1.1/dev-v1.1.json --work_dir outputs/squadv1_roberta --batch_size 5 --save_step_freq 200 --max_steps 50 --train_step_freq 5  --lr_policy WarmupAnnealing  --lr 1e-5 --pretrained_model_name roberta-base --optimizer adam_w'
+            sh 'cd examples/nlp/question_answering && FSCORE=$(cat outputs/squadv1_roberta/log_globalrank-0_localrank-0.txt |  grep "f1" |tail -n 1 |egrep -o "[0-9.]+"|tail -n 1 ) && echo $FSCORE && if [ $(echo "$FSCORE > 7.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
             sh 'rm -rf examples/nlp/question_answering/outputs/squadv1_roberta && rm -rf /home/TestData/nlp/squad_mini/v1.1/*cache*'
           }
         }
       }
     }
 
-    stage('L1: NLP-Intent Detection/SLot Tagging Examples - Multi-GPU') {
+    stage('L2: NLP-Intent Detection/Slot Tagging Examples - Multi-GPU') {
       when {
         anyOf{
           branch 'master'
@@ -198,14 +273,14 @@ pipeline {
       }
       failFast true
         steps {
-          sh 'cd examples/nlp/intent_detection_slot_tagging && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 joint_intent_slot_with_bert.py --num_gpus=2 --num_epochs=1 --max_seq_length=50 --dataset_name=jarvis-retail --data_dir=/home/TestData/nlp/retail/ --eval_file_prefix=eval --batch_size=10 --num_train_samples=-1 --do_lower_case --shuffle_data --work_dir=outputs'
-          sh 'cd examples/nlp/intent_detection_slot_tagging && TASK_NAME=$(ls outputs/) && DATE_F=$(ls outputs/$TASK_NAME/) && CHECKPOINT_DIR=outputs/$TASK_NAME/$DATE_F/checkpoints/ && CUDA_VISIBLE_DEVICES=0 python joint_intent_slot_infer.py --work_dir $CHECKPOINT_DIR --eval_file_prefix=eval --dataset_name=jarvis-retail --data_dir=/home/TestData/nlp/retail/ --batch_size=10'
-          sh 'cd examples/nlp/intent_detection_slot_tagging && TASK_NAME=$(ls outputs/) && DATE_F=$(ls outputs/$TASK_NAME/) && CHECKPOINT_DIR=outputs/$TASK_NAME/$DATE_F/checkpoints/ && CUDA_VISIBLE_DEVICES=0 python joint_intent_slot_infer_b1.py --data_dir=/home/TestData/nlp/retail/ --work_dir $CHECKPOINT_DIR --dataset_name=jarvis-retail --query="how much is it?"'
+          sh 'cd examples/nlp/intent_detection_slot_tagging && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 joint_intent_slot_with_bert.py --num_gpus=2 --pretrained_model_name=bert-base-uncased --num_epochs=1 --max_seq_length=50 --data_dir=/home/TestData/nlp/retail/ --eval_file_prefix=dev --batch_size=10 --num_train_samples=-1 --do_lower_case --work_dir=outputs_joint_intent_slot'
+          sh 'cd examples/nlp/intent_detection_slot_tagging && DATE_F=$(ls outputs_joint_intent_slot/) && CHECKPOINT_DIR=outputs_joint_intent_slot/$DATE_F/checkpoints/ && CUDA_VISIBLE_DEVICES=0 python joint_intent_slot_infer.py --checkpoint_dir $CHECKPOINT_DIR --pretrained_model_name=bert-base-uncased --eval_file_prefix=dev --data_dir=/home/TestData/nlp/retail/ --batch_size=10'
+          sh 'cd examples/nlp/intent_detection_slot_tagging && DATE_F=$(ls outputs_joint_intent_slot/) && CHECKPOINT_DIR=outputs_joint_intent_slot/$DATE_F/checkpoints/ && CUDA_VISIBLE_DEVICES=0 python joint_intent_slot_infer_b1.py --data_dir=/home/TestData/nlp/retail/ --pretrained_model_name=bert-base-uncased --checkpoint_dir $CHECKPOINT_DIR --query="how much is it?"'
           sh 'rm -rf examples/nlp/intent_detection_slot_tagging/outputs'
         }
       }
 
-    stage('L1: NLP-NMT Example') {
+    stage('L2: NLP-NMT Example') {
       when {
         anyOf{
           branch 'master'
@@ -215,11 +290,34 @@ pipeline {
       failFast true
         steps {
           sh 'cd examples/nlp/neural_machine_translation/ && CUDA_VISIBLE_DEVICES=0 python machine_translation_tutorial.py --max_steps 100'
-          sh 'rm -rf examples/nlp/neural_machine_translation/outputs'        
+          sh 'rm -rf examples/nlp/neural_machine_translation/outputs'
+      }
+    }
+
+    stage('L2: Parallel Stage QuartzNet/JasperNet inference') {
+      when {
+        anyOf{
+          branch 'master'
+          changeRequest()
+        }
+      }
+      failFast true
+      parallel {
+        stage('QuartzNet inference') {
+          steps {
+            sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=0 python speech2text_infer.py --asr_model=QuartzNet15x5-En --dataset=/home/TestData/librispeech/librivox-dev-other.json --wer_target=0.1060'
+          }
+        }
+        stage('JasperNet inference') {
+          steps {
+            sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=1 python speech2text_infer.py --asr_model=JasperNet10x5-En --dataset=/home/TestData/librispeech/librivox-dev-other.json --wer_target=0.1041'
+          }
+        }
       }
     }
 
-    stage('L1: Parallel Stage Jasper / GAN') {
+
+    stage('L2: Parallel Stage Jasper / GAN') {
       when {
         anyOf{
           branch 'master'
@@ -240,7 +338,7 @@ pipeline {
         }
         stage('Jasper AN4 O2') {
           steps {
-            sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=1 python jasper_an4.py --amp_opt_level=O2 --num_epochs=35 --test_after_training --work_dir=O2 --train_dataset=/home/TestData/an4_dataset/an4_train.json --eval_datasets=/home/TestData/an4_dataset/an4_val.json'
+            sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=1 python jasper_an4.py --amp_opt_level=O2 --num_epochs=35 --test_after_training --work_dir=O2 --train_dataset=/home/TestData/an4_dataset/an4_train.json --eval_datasets=/home/TestData/an4_dataset/an4_val.json --do_not_eval_at_start --eval_freq 1000'
           }
         }
       }
@@ -257,25 +355,7 @@ pipeline {
     //   }
     // }
 
-    stage('L1: Multi-GPU Jasper test') {
-      when {
-        anyOf{
-          branch 'master'
-          changeRequest()
-        }
-      }
-      failFast true
-      parallel {
-        stage('Jasper AN4 2 GPUs') {
-          steps {
-            sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 jasper_an4.py --num_epochs=40 --batch_size=24 --work_dir=multi_gpu --test_after_training  --train_dataset=/home/TestData/an4_dataset/an4_train.json --eval_datasets=/home/TestData/an4_dataset/an4_val.json'
-          }
-        }
-      }
-    }
-    
-
-    stage('L1: TTS Tests') {
+    stage('L2: Multi-GPU Jasper test') {
       when {
         anyOf{
           branch 'master'
@@ -284,18 +364,33 @@ pipeline {
       }
       failFast true
       steps {
-        sh 'cd examples/tts && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 tacotron2.py --max_steps=51 --model_config=configs/tacotron2.yaml --train_dataset=/home/TestData/an4_dataset/an4_train.json --amp_opt_level=O1 --eval_freq=50'
-        sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && echo $TTS_CHECKPOINT_DIR && LOSS=$(cat $TTS_CHECKPOINT_DIR/log_globalrank-0_localrank-0.txt | grep -o -E "Loss[ :0-9.]+" | grep -o -E "[0-9.]+" | tail -n 1) && echo $LOSS && if [ $(echo "$LOSS < 3.0" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
-        // sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && cp ../asr/multi_gpu/checkpoints/* $TTS_CHECKPOINT_DIR/checkpoints'
-        // sh 'CUDA_VISIBLE_DEVICES=0 python tacotron2_an4_test.py --model_config=configs/tacotron2.yaml --eval_dataset=/home/TestData/an4_dataset/an4_train.json --jasper_model_config=../asr/configs/jasper_an4.yaml --load_dir=$TTS_CHECKPOINT_DIR/checkpoints'
+        sh 'cd examples/asr && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 jasper_an4.py --num_epochs=40 --batch_size=24 --work_dir=multi_gpu --test_after_training  --train_dataset=/home/TestData/an4_dataset/an4_train.json --eval_datasets=/home/TestData/an4_dataset/an4_val.json --do_not_eval_at_start  --eval_freq 1000'
       }
     }
 
+
+    // stage('L2: TTS Tests') {
+    //   when {
+    //     anyOf{
+    //       branch 'master'
+    //       changeRequest()
+    //     }
+    //   }
+    //   failFast true
+    //   steps {
+    //     sh 'cd examples/tts && CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 tacotron2.py --num_epochs=4 --model_config=configs/tacotron2.yaml --train_dataset=/home/TestData/an4_dataset/an4_train.json --amp_opt_level=O1 --eval_datasets=/home/TestData/an4_dataset/an4_val.json --eval_freq=100 --do_not_eval_at_start --decoder_force --eval_batch_size=48 --random_seed=0'
+    //     sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && echo $TTS_CHECKPOINT_DIR && LOSS=$(cat $TTS_CHECKPOINT_DIR/log_globalrank-0_localrank-0.txt | grep -o -E "Loss an4_val[ :0-9.]+" | grep -o -E "[0-9.]+" | tail -n 1) && echo $LOSS && if [ $(echo "$LOSS - 4.344909191131592 < 0.1" | bc -l) -eq 1 ]; then echo "SUCCESS" && exit 0; else echo "FAILURE" && exit 1; fi'
+    //     // sh 'cd examples/tts && TTS_CHECKPOINT_DIR=$(ls | grep "Tacotron2") && cp ../asr/multi_gpu/checkpoints/* $TTS_CHECKPOINT_DIR/checkpoints'
+    //     // sh 'CUDA_VISIBLE_DEVICES=0 python tacotron2_an4_test.py --model_config=configs/tacotron2.yaml --eval_dataset=/home/TestData/an4_dataset/an4_train.json --jasper_model_config=../asr/configs/jasper_an4.yaml --load_dir=$TTS_CHECKPOINT_DIR/checkpoints'
+    //   }
+    // }
+
   }
 
   post {
     always {
+        sh "chmod -R 777 ."
         cleanWs()
     }
   }
-}
+}
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 000000000000..266820b33ad8
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,9 @@
+recursive-include nemo/ *.py
+include *.py
+include *.md
+include LICENSE
+include nemo/README.md
+exclude README.rst
+exclude setup.cfg
+exclude MANIFEST.in
+exclude Dockerfile
diff --git a/README.rst b/README.rst
index 3b5147719ee8..e6691245d408 100644
--- a/README.rst
+++ b/README.rst
@@ -24,63 +24,75 @@
 
 
 
-NVIDIA Neural Modules: NeMo
-===========================
+NVIDIA NeMo
+===========
 
-NeMo (Neural Modules) is a toolkit for creating AI applications using **neural modules** - conceptual blocks of neural networks that take *typed* inputs and produce *typed* outputs. Such modules typically represent data layers, encoders, decoders, language models, loss functions, or methods of combining activations.
+NeMo is a toolkit for creating `Conversational AI <https://developer.nvidia.com/conversational-ai#started>`_ applications.
 
-NeMo makes it easy to combine and re-use these building blocks while providing a level of semantic correctness checking via its neural type system. As long as two modules have compatible inputs and outputs, it is legal to chain them together.
+NeMo toolkit makes it possible for researchers to easily compose complex neural network architectures for conversational AI using reusable components - Neural Modules.
+**Neural Modules** are conceptual blocks of neural networks that take *typed* inputs and produce *typed* outputs. Such modules typically represent data layers, encoders, decoders, language models, loss functions, or methods of combining activations.
 
-NeMo's API is designed to be **framework-agnostic**, but currently only PyTorch is supported.
+The toolkit comes with extendable collections of pre-built modules for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS).
 
-The toolkit comes with extendable collections of pre-built modules for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS). Furthermore, NeMo provides built-in support for **distributed training** and **mixed precision** on the latest NVIDIA GPUs.
+Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes. NeMo has integration with NVIDIA Jarvis.
 
-NeMo consists of: 
+**Introduction**
 
-* **NeMo Core**: fundamental building blocks for all neural models and type system.
-* **NeMo collections**: pre-built neural modules for particular domains such as automatic speech recognition (nemo_asr), natural language processing (nemo_nlp) and text synthesis (nemo_tts).
+* Watch `this video <https://drive.google.com/a/nvidia.com/file/d/1AcOmtx4n1BAWvPoyhE0thcQXdloGWb6q/view?usp=sharing>`_ for a quick walk-through.
 
+* `Documentation (latest released version) <https://nvidia.github.io/NeMo/>`_ and `Documentation (master branch) <http://nemo-master-docs.s3-website.us-east-2.amazonaws.com/>`_
 
-**Introduction**
+* Read NVIDIA `Developer Blog to learn how to develop speech recognition models for different languages <https://devblogs.nvidia.com/jump-start-training-for-speech-recognition-models-with-nemo/>`_
 
-See `this video <https://nvidia.github.io/NeMo/>`_ for a quick walk-through.
+* Read NVIDIA `Developer Blog announcing NeMo <https://devblogs.nvidia.com/announcing-nemo-fast-development-of-speech-and-language-models/>`_
 
-**Requirements**
+* Read NVIDIA `Developer Blog for example applications <https://devblogs.nvidia.com/how-to-build-domain-specific-automatic-speech-recognition-models-on-gpus/>`_
 
-1) Python 3.6 or 3.7
-2) PyTorch 1.2.* or 1.3.* with GPU support
-3) (optional for best performance) NVIDIA APEX. Install from here: https://github.com/NVIDIA/apex
+* Read NVIDIA `Developer Blog for QuartzNet ASR model <https://devblogs.nvidia.com/develop-smaller-speech-recognition-models-with-nvidias-nemo-framework/>`_
 
-**Getting started**
+* Recommended version to install is **0.10.1** via ``pip install nemo-toolkit[all]``
 
-THE LATEST STABLE VERSION OF NeMo is **0.9.0** (which is available via PIP).
+* Recommended NVIDIA `NGC NeMo Toolkit container <https://ngc.nvidia.com/catalog/containers/nvidia:nemo>`_
 
-**Docker Container**
- NVIDIA `NGC NeMo Toolkit container <https://ngc.nvidia.com/catalog/containers/nvidia:nemo>`_ is now available.
+* Pretrained models are available on NVIDIA `NGC Model repository <https://ngc.nvidia.com/catalog/models?orderBy=modifiedDESC&query=nemo&quickFilter=models&filters=>`_
 
-* Pull the docker: ``docker pull nvcr.io/nvidia/nemo:v0.9``
-* Run: ``docker run --runtime=nvidia -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.9``
 
-If you are using the NVIDIA `NGC PyTorch container <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ follow these instructions
+Getting started
+~~~~~~~~~~~~~~~
 
-* Pull the docker: ``docker pull nvcr.io/nvidia/pytorch:19.11-py3``
-* Run: ``docker run --runtime=nvidia -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:19.11-py3``
+THE LATEST STABLE VERSION OF NeMo is **0.10.1** (Available via PIP).
 
-.. code-block:: bash
+**Requirements**
 
-    pip install nemo-toolkit  # installs NeMo Core
-    pip install nemo-asr # installs NeMo ASR collection
-    pip install nemo-nlp # installs NeMo NLP collection
-    pip install nemo-tts # installs NeMo TTS collection
+1) Python 3.6 or 3.7
+2) PyTorch 1.4.* with GPU support
+3) (optional, for best performance) NVIDIA APEX. Install from here: https://github.com/NVIDIA/apex
 
-* DEVELOPMENT: If you'd like to use master branch and/or develop NeMo you can run "reinstall.sh" script.
 
-**Documentation**
+Docker containers
+~~~~~~~~~~~~~~~~~
 
-`NeMo documentation <https://nvidia.github.io/NeMo/>`_
+**NeMo docker container**
+
+You can use NeMo's docker container with all dependencies pre-installed
+
+.. code-block:: bash
+
+    docker run --runtime=nvidia -it --rm -v --shm-size=16g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.10
+
+
+If you are using the NVIDIA `NGC PyTorch container <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ follow these instructions
 
-See `examples/start_here` to get started with the simplest example. The folder `examples` contains several examples to get you started with various tasks in NLP and ASR.
+* Pull the docker: ``docker pull nvcr.io/nvidia/pytorch:20.01-py3``
+* Run:``docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:20.01-py3``
+* ``apt-get update && apt-get install -y libsndfile1``
+* ``pip install nemo_toolkit`` Installs NeMo core only.
+* ``pip install nemo_toolkit[all]`` Installs NeMo core and ALL collections
+* ``pip install nemo_toolkit[asr]`` Installs NeMo core and ASR (Speech Recognition) collection
+* ``pip install nemo_toolkit[nlp]`` Installs NeMo core and NLP (Natural Language Processing) collection
+* ``pip install nemo_toolkit[tts]`` Installs NeMo core and TTS (Speech Synthesis) collection
 
+See `examples/start_here` to get started with the simplest example.
 
 **Tutorials**
 
@@ -88,17 +100,62 @@ See `examples/start_here` to get started with the simplest example. The folder `
 * `Natural language processing <https://nvidia.github.io/NeMo/nlp/intro.html>`_
 * `Speech Synthesis <https://nvidia.github.io/NeMo/tts/intro.html>`_
 
+Pre-trained models
+~~~~~~~~~~~~~~~~~~
+
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| Modality   | Model                                                                                        | Trained on            |
++============+==============================================================================================+=======================+
+| ASR        | `QuartzNet15x5En <https://ngc.nvidia.com/catalog/models/nvidia:multidataset_quartznet15x5>`_ | LibriSpeech, WSJ,     |
+|            |                                                                                              | Mozilla Common Voice  |
+|            |                                                                                              | (en_1488_2019-12-10), |
+|            |                                                                                              | Fisher, Switchboard   |
+|            |                                                                                              | and Singapore English |
+|            |                                                                                              | National Speech       |
+|            |                                                                                              | Corpus                |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| ASR        | `QuartzNet15x5Zh <https://ngc.nvidia.com/catalog/models/nvidia:aishell2_quartznet15x5>`_     | AISHELL-2 Mandarin    |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| NLP        | `BERT base uncased <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo>`_   |English Wikipedia and  |
+|            |                                                                                              |BookCorpus dataset     |
+|            |                                                                                              |seq len <= 512         |
+|            |                                                                                              |                       |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| NLP        | `BERT large uncased <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo>`_ |English Wikipedia and  |
+|            |                                                                                              |BookCorpus dataset     |
+|            |                                                                                              |seq len <= 512         |
+|            |                                                                                              |                       |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| TTS        | `Tacotron2 <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_               |LJspeech               |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+| TTS        | `WaveGlow <https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ljspeech>`_                 |LJspeech               |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
+|            |                                                                                              |                       |
++------------+----------------------------------------------------------------------------------------------+-----------------------+
+
+
+DEVELOPMENT
+~~~~~~~~~~~
+If you'd like to use master branch and/or develop NeMo you can run "reinstall.sh" script.
+
+`Documentation (master branch) <http://nemo-master-docs.s3-website.us-east-2.amazonaws.com/>`_.
+
 **Installing From Github**
 
 If you prefer to use NeMo's latest development version (from GitHub) follow the steps below:
 
-*Note*: For step 2 and 3, if you want to use NeMo in development mode, use: ``pip install -e .`` instead of ``pip install .``
-
 1) Clone the repository ``git clone https://github.com/NVIDIA/NeMo.git``
 2) Go to NeMo folder and re-install the toolkit with collections:
 
 .. code-block:: bash
-	
+
     ./reinstall.sh
 
 **Style tests**
@@ -109,14 +166,33 @@ If you prefer to use NeMo's latest development version (from GitHub) follow the
     python setup.py style --fix  # Tries to fix error in-place.
     python setup.py style --scope=tests  # Operates within certain scope (dir of file).
 
-**Unittests**
+** NeMo Test Suite**
+
+NeMo contains test suite divided into 5 subsets:
+ 1) ``unit``: unit tests, i.e. testing a single, well isolated functionality
+ 2) ``integration``: tests checking the elements when integrated into subsystems
+ 3) ``system``: tests working at the highest integration level
+ 4) ``acceptance``: tests checking whether the developed product/model passes the user defined acceptance criteria
+ 5) ``docs``: tests related to documentation (deselect with '-m "not docs"')
 
-This command runs unittests:
+The user can run  all the tests locally by simply executing:
 
 .. code-block:: bash
 
-    ./reinstall.sh
-    python -m unittest tests/*.py
+    pytest
+
+In order to run a subset of tests one can use the ``-m`` argument followed by the subset name, e.g. for ``system`` subset:
+
+.. code-block:: bash
+
+    pytest -m system
+
+By default, all the tests will be executed on GPU. There is also an option to run the test suite on CPU
+by passing the ``--cpu`` command line argument, e.g.:
+
+.. code-block:: bash
+
+    pytest -m unit --cpu
 
 
 Citation
@@ -124,11 +200,14 @@ Citation
 
 If you are using NeMo please cite the following publication
 
-@misc{nemo2019,
-    title={NeMo: a toolkit for building AI applications using Neural Modules},
-    author={Oleksii Kuchaiev and Jason Li and Huyen Nguyen and Oleksii Hrinchuk and Ryan Leary and Boris Ginsburg and Samuel Kriman and Stanislav Beliaev and Vitaly Lavrukhin and Jack Cook and Patrice Castonguay and Mariya Popova and Jocelyn Huang and Jonathan M. Cohen},
-    year={2019},
-    eprint={1909.09577},
-    archivePrefix={arXiv},
-    primaryClass={cs.LG}
-}
+.. code-block:: tex
+
+    @misc{nemo2019,
+        title={NeMo: a toolkit for building AI applications using Neural Modules},
+        author={Oleksii Kuchaiev and Jason Li and Huyen Nguyen and Oleksii Hrinchuk and Ryan Leary and Boris Ginsburg and Samuel Kriman and Stanislav Beliaev and Vitaly Lavrukhin and Jack Cook and Patrice Castonguay and Mariya Popova and Jocelyn Huang and Jonathan M. Cohen},
+        year={2019},
+        eprint={1909.09577},
+        archivePrefix={arXiv},
+        primaryClass={cs.LG}
+    }
+
diff --git a/docs/docs_zh/.nojekyll b/docs/docs_zh/.nojekyll
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/docs/docs_zh/sources/source/asr/asr_all.bib b/docs/docs_zh/sources/source/asr/asr_all.bib
index f27530d6d5e3..3cdd9c68f9d2 100644
--- a/docs/docs_zh/sources/source/asr/asr_all.bib
+++ b/docs/docs_zh/sources/source/asr/asr_all.bib
@@ -51,6 +51,16 @@ @article{li2019jasper
   year={2019}
 }
 
+@misc{ardila2019common,
+    title={Common Voice: A Massively-Multilingual Speech Corpus},
+    author={Rosana Ardila and Megan Branson and Kelly Davis and Michael Henretty and Michael Kohler and Josh Meyer and Reuben Morais and Lindsay Saunders and Francis M. Tyers and Gregor Weber},
+    year={2019},
+    eprint={1912.06670},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+
+
 
 @article{graves2012,
   title={Sequence Transduction with Recurrent Neural Networks},
@@ -915,3 +925,10 @@ @article{novograd2019
           eid = {arXiv:1905.11286},
        eprint = {1905.11286},
 }
+
+@article{kriman2019quartznet,
+  title={Quartznet: Deep automatic speech recognition with 1d time-channel separable convolutions},
+  author={Kriman, Samuel and Beliaev, Stanislav and Ginsburg, Boris and Huang, Jocelyn and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Zhang, Yang},
+  journal={arXiv preprint arXiv:1910.10261},
+  year={2019}
+}
\ No newline at end of file
diff --git a/docs/docs_zh/sources/source/asr/datasets.rst b/docs/docs_zh/sources/source/asr/datasets.rst
index 178d223f036a..53fbff8b944b 100644
--- a/docs/docs_zh/sources/source/asr/datasets.rst
+++ b/docs/docs_zh/sources/source/asr/datasets.rst
@@ -7,7 +7,7 @@ LibriSpeech
 -----------
 
 运行下面的脚本下载 LibriSpeech 数据集，并把它转换成 `nemo_asr` 集合需要的格式。
-你至少需要 110GB 的空间。
+你至少需要 250GB 的空间。
 
 .. code-block:: bash
 
@@ -110,6 +110,21 @@ Fisher English Training Speech
 
 你可以选择性的加入 ``--min_slice_duration=<num_seconds>`` 如果你想改变最小音频片段长度。
 
+AN4 数据集
+-----------
+
+这是一个由卡内基梅隆大学录制和提供的小数据集。它包含很多人说的地址、姓名等内容。关于这个数据集的信息可以在这找到 `official CMU site <http://www.speech.cs.cmu.edu/databases/an4/>`_ 。
+
+请下载并解压数据集（其被标注为“NIST” Sphere（.sph）音频文件格式（64M）在如下的链接中）：http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz.
+
+运行如下的脚本，使用 sox 将 .sph 文件转换成 .wav 格式，同时构建训练和测试清单。
+
+.. code-block:: bash
+
+  python process_an4_data.py --data_root=<path_to_extracted_data>
+
+当这个脚本运行结束时，你应该可以在 `<data_root>/an4/` 文件夹下得到 `train_manifest.json` 和 `test_manifest.json` 两个文件。
+
 AISHELL-1
 ---------
 
diff --git a/docs/docs_zh/sources/source/asr/jasper.rst b/docs/docs_zh/sources/source/asr/jasper.rst
index c805809a4258..130a58f53e35 100644
--- a/docs/docs_zh/sources/source/asr/jasper.rst
+++ b/docs/docs_zh/sources/source/asr/jasper.rst
@@ -10,7 +10,7 @@ Jasper 家族的模型可以用 Jasper_[BxR] 来表示，其中 B 是块的个
         :align: center
         :alt: japer model
 
-预训练的模型在 `这里 <https://ngc.nvidia.com/catalog/models/nvidia:jaspernet10x5dr>`_ 。
+预训练的模型如下：
 
 ============= ======================= =================================================================================
 Network       Dataset                 Download Link 
diff --git a/docs/docs_zh/sources/source/asr/quartznet.rst b/docs/docs_zh/sources/source/asr/quartznet.rst
index 72344e64ba2a..85eb724fbe22 100644
--- a/docs/docs_zh/sources/source/asr/quartznet.rst
+++ b/docs/docs_zh/sources/source/asr/quartznet.rst
@@ -9,9 +9,9 @@ QuartzNet 是 Jasper模型 :cite:`asr-models-li2019jasper` 的一个版本, 它
         :align: center
         :alt: quartznet model
 
-    .. note:: 这个checkpoint是在LibriSpeech上训练的，完全在EN Mozilla Common Voice的部分数据集上做的“验证”
+    .. note:: 这个 checkpoint 是在 LibriSpeech  :cite:`panayotov2015librispeech` 上训练的，完全在EN Mozilla Common Voice :cite:`ardila2019common` 的部分数据集上做的“验证”
 
-我们正在写 QuartzNet 的论文，不久就会发布。
+`QuartzNet 论文 <https://arxiv.org/abs/1910.10261>`_.
 
 预训练的模型在 `这里 <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`__ 。
 
@@ -20,8 +20,11 @@ Network       Dataset               Download Link
 ============= ===================== ==============================================================================
 QuartzNet15x5 Librispeech,          `这里 <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`__
               Mozilla Common Voice
-QuartzNet15x5 Librispeech,          `这里 <https://ngc.nvidia.com/catalog/models/nvidia:wsj_quartznet_15x5>`__
-              Mozilla Common Voice,
-              WSJ
 QuartzNet15x5 Aishell2              `这里 <https://ngc.nvidia.com/catalog/models/nvidia:aishell2_quartznet15x5>`__
 ============= ===================== ==============================================================================
+
+References
+----------
+
+.. bibliography:: asr_all.bib
+    :style: plain
\ No newline at end of file
diff --git a/docs/docs_zh/sources/source/asr/tutorial.rst b/docs/docs_zh/sources/source/asr/tutorial.rst
index 35221392db75..2bb02aeaa417 100644
--- a/docs/docs_zh/sources/source/asr/tutorial.rst
+++ b/docs/docs_zh/sources/source/asr/tutorial.rst
@@ -7,18 +7,21 @@
 .. note::
     在这个教程中你只需要用到 `nemo` 和 `nemo_asr` 。
 
+一个基础的 ASR Jupyter教程可以参见 `GitHub <https://github.com/NVIDIA/NeMo/tree/master/examples/asr/notebooks>`_ 。
+
+
 简介
 -------------
-这个教程中我们使用 Jasper :cite:`asr-tut-li2019jasper` 模型。Jasper 是一个基于 CTC :cite:`asr-tut-graves2006` 的端到端的语音识别模型。这个模型之所以被称之为“端到端”是因为它在不需要额外的对齐信息下就可以把输入的音频样本转到对应的文本上。
+这个 ASR 教程中我们使用 QuartzNet :cite:`asr-tut-kriman2019quartznet` 模型。QuartzNet 是一个基于 CTC :cite:`asr-tut-graves2006` 的端到端的语音识别模型。这个模型之所以被称之为“端到端”是因为它在不需要额外的对齐信息下就可以把输入的音频样本转到对应的文本上。
 CTC 可以在音频和文本中找到对齐方式。基于 CTC 的语音识别管道包含了下面的这些模块：
 
 1. 音频预处理（特征提取）：信号正则化，窗口化，（log）频谱（梅尔谱或者 MFCC）
 2. 神经网络声学模型（在给定的每个时间步上的输入特征下，预测词表中字符c的概率分布 P_t(c)）
 3. CTC 损失函数
 
-    .. image:: ctc_asr.png
-        :align: center
-        :alt: CTC-based ASR
+.. image:: ctc_asr.png
+    :align: center
+    :alt: CTC-based ASR
 
 获取数据
 --------
@@ -36,7 +39,7 @@ CTC 可以在音频和文本中找到对齐方式。基于 CTC 的语音识别
     # python get_librispeech_data.py --data_root=data --data_set=ALL
 
 .. note::
-    如果用 ``--data_set=dev_clean,train_clean_100`` ，你的磁盘空间至少需要 26GB。如果用 ``--data_set=ALL`` ，你的磁盘空间至少需要 110GB。下载和处理都需要一段时间，所以休息一下下吧。
+    如果用 ``--data_set=dev_clean,train_clean_100`` ，你的磁盘空间至少需要 52GB。如果用 ``--data_set=ALL`` ，你的磁盘空间至少需要 250GB。下载和处理都需要一段时间，所以休息一下下吧。下载完成后，你可以删除原始的 .tar.gz 和 .flac 文件，这样会减少一半的硬盘使用。
 
 
 下载和转换后, 你的 `data` 文件夹应该包含两个 Json 文件：
@@ -56,13 +59,13 @@ Json 文件中的每一行都指的是一个训练样本 `audio_filepath` 包含
 训练
 ----
 
-我们会在 Jasper 家族 :cite:`asr-tut-li2019jasper` 中训练一个小模型。
-Jasper（Just Another SPeech Recognizer）是一个深度时延网络 （TDNN） 包含了一维卷积层的块（blocks）。
-Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块的个数, R 表示的是一个块中卷积子块的个数。每个子块包含了一个一维卷积层，一层 batch normalization，一个 ReLU 激活函数，和一个 dropout 层：
+我们会在 QuartzNet 家族 :cite:`asr-tut-kriman2019quartznet` 中训练一个小模型。
+QuartzNet 是一个深度时延网络 （TDNN） 包含了一维卷积层的块（blocks）。
+QuartzNet 家族的模型的结构可以这样表示 QuartzNet_[BxR] 其中 B 是块的个数, R 表示的是一个块中卷积子块的个数。每个子块包含了一个一维卷积层，一层 batch normalization，一个 ReLU 激活函数，和一个 dropout 层：
 
-    .. image:: jasper.png
-        :align: center
-        :alt: japer model
+.. image:: quartz_vertical.png
+    :align: center
+    :alt: quartznet model
 
 在这个教程中我们会使用 [12x1] 的模型结构并且会用分开的卷积。
 下面脚本的训练（on `train_clean_100.json` ）和评估（on `dev_clean.json` ）都是在一块GPU上：
@@ -83,7 +86,7 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
     # 创建 Neural Factory
     # 它会为我们创建日志文件和 tensorboard 记录器
     nf = nemo.core.NeuralModuleFactory(
-        log_dir='jasper12x1SEP',
+        log_dir='QuartzNet12x1',
         create_tb_writer=True)
     tb_writer = nf.tb_writer
 
@@ -93,15 +96,15 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
     # 到验证集列表文件的路径
     eval_datasets = "<path_to_where_you_put_data>/dev_clean.json"
 
-    # Jasper 模型定义
+    # QuartzNet 模型定义
     from ruamel.yaml import YAML
 
     # 这里我们用可分离卷积
     # with 12 blocks (k=12 repeated once r=1 from the picture above)
     yaml = YAML(typ="safe")
-    with open("<nemo_git_repo_root>/examples/asr/configs/jasper12x1SEP.yaml") as f:
-        jasper_model_definition = yaml.load(f)
-    labels = jasper_model_definition['labels']
+    with open("<nemo_git_repo_root>/examples/asr/configs/quartznet12x1.yaml") as f:
+        quartznet_model_definition = yaml.load(f)
+    labels = quartznet_model_definition['labels']
 
     # 初始化神经模块
     data_layer = nemo_asr.AudioToTextDataLayer(
@@ -114,10 +117,10 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor()
     spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5)
 
-    jasper_encoder = nemo_asr.JasperEncoder(
+    encoder = nemo_asr.JasperEncoder(
         feat_in=64,
-        **jasper_model_definition['JasperEncoder'])
-    jasper_decoder = nemo_asr.JasperDecoderForCTC(
+        **quartznet_model_definition['JasperEncoder'])
+    decoder = nemo_asr.JasperDecoderForCTC(
         feat_in=1024, num_classes=len(labels))
     ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
@@ -127,9 +130,9 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
     processed_signal, processed_signal_len = data_preprocessor(
         input_signal=audio_signal, length=audio_signal_len)
     aug_signal = spec_augment(input_spec=processed_signal)
-    encoded, encoded_len = jasper_encoder(
+    encoded, encoded_len = encoder(
         audio_signal=aug_signal, length=processed_signal_len)
-    log_probs = jasper_decoder(encoder_output=encoded)
+    log_probs = decoder(encoder_output=encoded)
     predictions = greedy_decoder(log_probs=log_probs)
     loss = ctc_loss(
         log_probs=log_probs, targets=transcript,
@@ -141,9 +144,9 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
     processed_signal_v, processed_signal_len_v = data_preprocessor(
         input_signal=audio_signal_v, length=audio_signal_len_v)
     # 注意我们再验证 DAG 的时候不会用数据增强
-    encoded_v, encoded_len_v = jasper_encoder(
+    encoded_v, encoded_len_v = encoder(
         audio_signal=processed_signal_v, length=processed_signal_len_v)
-    log_probs_v = jasper_decoder(encoder_output=encoded_v)
+    log_probs_v = decoder(encoder_output=encoded_v)
     predictions_v = greedy_decoder(log_probs=log_probs_v)
     loss_v = ctc_loss(
         log_probs=log_probs_v, targets=transcript_v,
@@ -208,7 +211,7 @@ Jasper 家族的模型的结构可以这样表示 Jasper_[BxR] 其中 B 是块
         )
 
 .. note::
-    这个脚本在 GTX1080 上完成 50 轮训练需要大约 7 小时
+    这个脚本在 GTX1080 上完成 50 轮训练需要大约 7 小时。你可以得到的 WER 应该在30%左右。
 
 .. tip::
     进一步提升 WER:
@@ -231,7 +234,6 @@ NeMo 中的混合精度和分布式训练是基于 `英伟达的 APEX 库 <https
         backend=nemo.core.Backend.PyTorch,
         local_rank=args.local_rank,
         optimization_level=nemo.core.Optimization.mxprO1,
-        placement=nemo.core.DeviceType.AllGpu,
         cudnn_benchmark=True)
 
 .. note::
@@ -248,19 +250,18 @@ NeMo 中的混合精度和分布式训练是基于 `英伟达的 APEX 库 <https
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/jasper.py ...
-
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet.py ...
 
 大量训练样本例子
 ~~~~~~~~~~~~~~~~~~~~~~
 
-请参考 `<nemo_git_repo_root>/examples/asr/jasper.py` , 该实例做一个更全面的理解。它构建了一个训练的有向无环图，在不同的验证集上构建了多达三个有向无环图。
+请参考 `<nemo_git_repo_root>/examples/asr/quartznet.py` , 该实例做一个更全面的理解。它构建了一个训练的有向无环图，在不同的验证集上构建了多个有向无环图。每个验证 DAG 与训练 DAG 共享相同的模型和参数，可以用于评估不同的数据集。
 
 假设你能够使用基于 Volta 架构的的 DGX 服务器，你可以这样运行：
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_dataset=/manifests/librivox-train-all.json --eval_datasets /manifests/librivox-dev-clean.json /manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/quartznet15x5.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_dataset=/manifests/librivox-train-all.json --eval_datasets /manifests/librivox-dev-clean.json /manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/quartznet15x5.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
 
 上面的命令会运行一个8 GPU 的混合精度训练。其中不同的列表文件（.json）文件是不同的数据集。你可以用你的数据来替代它们。
 
@@ -278,10 +279,10 @@ NeMo 中的混合精度和分布式训练是基于 `英伟达的 APEX 库 <https
 
 .. code-block:: python
 
-    jasper_encoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperEncoder-STEP-247400.pt")
-    jasper_decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt")
+    encoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperEncoder-STEP-247400.pt")
+    decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt")
     # 防止是分布式训练加入 args.local_rank
-    jasper_decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt", args.local_rank)
+    decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt", args.local_rank)
 
 .. tip::
     微调的时候，用小一点的学习率。
@@ -294,7 +295,7 @@ NeMo 中的混合精度和分布式训练是基于 `英伟达的 APEX 库 <https
 
 .. code-block:: bash
 
-    python <nemo_git_repo_root>/examples/asr/jasper_infer.py --model_config=<nemo_git_repo_root>/examples/asr/configs/quartznet15x5.yaml --eval_datasets "<path_to_data>/dev_clean.json" --load_dir=<directory_containing_checkpoints>
+    python <nemo_git_repo_root>/examples/asr/jasper_eval.py --model_config=<nemo_git_repo_root>/examples/asr/configs/quartznet15x5.yaml --eval_datasets "<path_to_data>/dev_clean.json" --load_dir=<directory_containing_checkpoints>
 
 
 用语言模型推理
@@ -311,13 +312,34 @@ NeMo 中的混合精度和分布式训练是基于 `英伟达的 APEX 库 <https
         * ``sudo apt-get update && sudo apt-get install swig``
         * ``sudo apt-get install pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev``
         * ``sudo apt-get install libsndfile1-dev python-setuptools libboost-all-dev python-dev``
+        * ``sudo apt-get install cmake``
         * ``./install_decoders.sh``
     * 在 Librispeech 上构建一个 6-gram KenLM 的语言模型 ``./build_6-gram_OpenSLR_lm.sh``
     * 运行 ``jasper_infer.py`` 带上 ``--lm_path`` 来指定语言模型的路径
 
     .. code-block:: bash
 
-        python <nemo_git_repo_root>/examples/asr/jasper_infer.py --model_config=<nemo_git_repo_root>/examples/asr/configs/quartznet15x5.yaml --eval_datasets "<path_to_data>/dev_clean.json" --load_dir=<directory_containing_checkpoints> --lm_path=<path_to_6gram.binary>
+        python <nemo_git_repo_root>/examples/asr/jasper_eval.py --model_config=<nemo_git_repo_root>/examples/asr/configs/quartznet15x5.yaml --eval_datasets "<path_to_data>/dev_clean.json" --load_dir=<directory_containing_checkpoints> --lm_path=<path_to_6gram.binary>
+
+Kaldi 兼容性
+-------------------
+
+在 ``nemo_asr`` 中，可以使用 ``KaldiFeatureDataLayer`` 来读取 Kaldi 格式的数据集。 
+为了读取 Kaldi 格式的数据，你需要提供一个文件夹，其中包含以下文件：
+
+* ``feats.scp``, 这个文件将句子 ID 映射到.ark文件，.ark文件中存放了相应的音频数据。
+* ``text``, 这个文件把句子 ID 映射到文本标注。
+* (可选) ``utt2dur``, 这个文件把句子 ID 映射到音频数据的时长，如果你要基于时长切分音频的话，这个文件是必需的。
+
+当然，.ark文件中包含了相应的音频数据，与 ``feats.scp`` 文件中所提供的位置一致。
+
+为了加载Kaldi格式的数据，你需要使用 ``KaldiFeatureDataLayer`` 而不是 ``AudioToTextDataLayer`` 。
+``KaldiFeatureDataLayer`` 层接收 ``kaldi_dir`` 这个参数，而不是 ``manifest_filepath`` ，这个参数需要设置成包含如上所述文件的目录。
+参见 `文档 <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo_asr.data_layer.KaldiFeatureDataLayer>`_ 来获取这个层参数的更多详细信息。
+
+.. note::
+
+  如果你切换到 ``KaldiFeatureDataLayer``，请确保任何 ``feat_in`` 参数都正确表示了 Kaldi 特征的维度（例如在encoder中）。此外，你的数据应该很可能被预处理过（例如，MFCC格式），这种情况下，你并不需要 ``AudioToMelSpectrogramPreprocessor`` 中的任何音频前处理。 
 
 
 参考
diff --git a/docs/docs_zh/sources/source/collections/nemo_nlp.rst b/docs/docs_zh/sources/source/collections/nemo_nlp.rst
index 223cd71d3b6d..3d967ca44f5a 100644
--- a/docs/docs_zh/sources/source/collections/nemo_nlp.rst
+++ b/docs/docs_zh/sources/source/collections/nemo_nlp.rst
@@ -49,7 +49,6 @@ NLP 分词器
 NLP 神经模块
 ------------------
 
-
 .. automodule:: nemo.collections.nlp.nm.data_layers
    :members:
    :undoc-members:
@@ -88,13 +87,13 @@ NLP 神经模块
    :show-inheritance:
    :exclude-members: forward
 
-.. automodule:: nemo.collections.nlp.nm.trainables.dialogue_state_tracking.state_tracking_trade_nm
+.. automodule:: nemo.collections.nlp.nm.trainables.dialogue_state_tracking.trade_generator_nm
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: forward
 
-.. automodule:: nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm
+.. automodule:: nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_classifier_nm
    :members:
    :undoc-members:
    :show-inheritance:
@@ -108,3 +107,15 @@ NLP Hugging Face 神经模块
    :undoc-members:
    :show-inheritance:
    :exclude-members: forward
+
+.. automodule:: nemo.collections.nlp.nm.trainables.common.huggingface.albert_nm
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :exclude-members: forward
+
+.. automodule:: nemo.collections.nlp.nm.trainables.common.huggingface.roberta_nm
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :exclude-members: forward
diff --git a/docs/docs_zh/sources/source/conf.py b/docs/docs_zh/sources/source/conf.py
index b8220000049e..00e6b79bcd3f 100644
--- a/docs/docs_zh/sources/source/conf.py
+++ b/docs/docs_zh/sources/source/conf.py
@@ -13,39 +13,18 @@
 # All configuration values have a default; values that are commented out
 # infer to show the default.
 
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
 import os
 import sys
-from unittest.mock import MagicMock
-
-import nemo
-
-sys.path.insert(0, os.path.abspath("."))
-sys.path.insert(0, os.path.abspath("../../../"))
-
-# ---- Mocking up the classes. -----
-MOCK_CLASSES = {'Dataset': 'torch.utils.data', 'Module': 'torch.nn'}
-
 
-class Mock(MagicMock):
-    @classmethod
-    def __getattr__(cls, name):
-        if name in MOCK_CLASSES:
-            # return object  # Sphinx renders object in base classes
-            return type(name, (object,), {'__module__': MOCK_CLASSES[name]})
-        elif name == '__file__':
-            return "FOO"
-        elif name == '__loader__':
-            return "BAR"
-        return MagicMock()
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
 
+sys.path.insert(0, os.path.abspath("../../../.."))
+sys.path.insert(0, os.path.abspath(os.path.join("../../../..", "nemo")))
 
-# ---- Mocking up the python modules. -----
 
-MOCK_MODULES = [
+autodoc_mock_imports = [
     'torch',
     'torch.nn',
     'torch.utils',
@@ -55,13 +34,33 @@ def __getattr__(cls, name):
     'torchvision',
     'torchvision.models',
     'torchtext',
+    'torch_stft',
     'h5py',
     'kaldi_io',
     'transformers',
     'transformers.tokenization_bert',
+    'apex',
+    'ruamel',
+    'frozendict',
+    'inflect',
+    'unidecode',
+    'librosa',
+    'soundfile',
+    'sentencepiece',
+    'youtokentome',
+    'megatron-lm',
+    'numpy',
+    'dateutil',
+    'wget',
+    'scipy',
+    'pandas',
+    'matplotlib',
+    'sklearn',
+    'braceexpand',
+    'webdataset',
+    'tqdm',
 ]
 
-sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 
 # -- General configuration ------------------------------------------------
 
@@ -84,6 +83,9 @@ def __getattr__(cls, name):
     "sphinxcontrib.bibtex",
 ]
 
+locale_dirs = ['locale/']  # path is example but recommended.
+gettext_compact = False  # optional.
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
 
@@ -98,17 +100,21 @@ def __getattr__(cls, name):
 
 # General information about the project.
 project = "nemo"
-copyright = "2018-2019, NVIDIA"
+copyright = "2018-2020, NVIDIA"
 author = "NVIDIA"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-#
+
+from package_info import __version__
+
 # The short X.Y version.
-version = "0.9.0"
+# version = "0.10.0"
+version = __version__
 # The full version, including alpha/beta/rc tags.
-release = "0.9.0"
+# release = "0.9.0"
+release = __version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/docs_zh/sources/source/index.rst b/docs/docs_zh/sources/source/index.rst
index 206306d7b398..5287a28e5659 100644
--- a/docs/docs_zh/sources/source/index.rst
+++ b/docs/docs_zh/sources/source/index.rst
@@ -1,15 +1,15 @@
-NVIDIA Neural Modules: NeMo 中文文档
-=====================================
+NVIDIA NeMo 开发者指南
+======================
 
 .. toctree::
    :hidden:
    :maxdepth: 2
 
    简介 <self>
-   installation
    tutorials/intro
    training
    asr/intro
+   speech_command/intro
    nlp/intro
    tts/intro
    collections/modules
@@ -17,8 +17,7 @@ NVIDIA Neural Modules: NeMo 中文文档
 
 
 
-
-Neural Modules (NeMo) 是一个用神经模块来构建 AI 应用的工具包，它与具体的框架无关。当前支持 PyTorch 框架。
+NeMo 是一个用神经模块来构建 AI 应用的工具包，它与具体的框架无关。当前支持 PyTorch 框架。
 
 一个“神经模块”指的是，根据一系列的输入来计算一系列输出的代码块。
 
@@ -26,7 +25,9 @@ Neural Modules (NeMo) 是一个用神经模块来构建 AI 应用的工具包，
 
 用 NeMo 构建的应用，是一个由连接在一起的模块构成的有向无环图，研究者们可以很容易地通过 API 兼容的模块，定义和构建新的语音或语言神经网络。
 
-**简介**
+
+简介
+----
 
 我们可以通过以下这个视频有个概览：
 
@@ -37,7 +38,8 @@ Neural Modules (NeMo) 是一个用神经模块来构建 AI 应用的工具包，
     </div>
 
 
-**核心概念和特性**
+核心概念和特性
+--------------
 
 * `NeuralModule` 类 - 表示以及执行一个神经模块。
 * `NmTensor` - 表示的是神经模块端口之间流动的激活元。
@@ -47,87 +49,95 @@ Neural Modules (NeMo) 是一个用神经模块来构建 AI 应用的工具包，
 * `Collections` - NeMo中附带的模块集合 -  与其相关的模块集合，例如， `nemo_asr` (语音识别) 以及 `nemo_nlp` (自然语言处理)
 
 
-**安装依赖**
+安装依赖
+--------
 
 1) Python 3.6 or 3.7
-2) PyTorch 1.2 with GPU support
-3) NVIDIA APEX: https://github.com/NVIDIA/apex
+2) PyTorch >= 1.4 带GPU支持
+3) （可选）NVIDIA APEX: https://github.com/NVIDIA/apex
 
+.. _installation:
 
-**开始吧**
+开始吧
+-------
 
-你可以从这个 docker 容器开始 `NGC PyTorch容器 <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ 这里面已经包含了上面所需要的环境。
+你可以从这个 docker 容器开始 `NGC NeMo 容器 <https://ngc.nvidia.com/catalog/containers/nvidia:nemo>`_ 这里面已经包含了最新版的 NeMo 和上面所需要的环境。
 
 .. code-block:: bash
 
-    # pull相应的 docker 容器
-    docker pull nvcr.io/nvidia/pytorch:19.10-py3
-
-    # 运行下面两个命令之一
-    # 如果你的 docker 版本 <19.03
-    nvidia-docker run -it --rm -v <nemo_github_folder>:/NeMo --shm-size=1g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:19.10-py3
-    
-    # 如果你的 docker 版本 >=19.03
-    docker run --runtime=nvidia -it --rm -v <nemo_github_folder>:/NeMo --shm-size=1g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/pytorch:19.10-py3
+     # Pull the docker
+    docker pull nvcr.io/nvidia/nemo:v0.10
 
-    cd /NeMo
+    # Run Docker for docker version >=19.03
+    docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.10
 
-
-接着运行下面的步骤
+马上开始用 NeMo 吧。
 
 如果你已经安装了所有依赖（或者使用了 `NGC PyTorch容器 <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ ）
 那么你只要简单的用 pip来安装最新的 NeMo 和 NeMo 集合即可
 
 .. code-block:: bash
 
-    pip install nemo-toolkit  # 安装 NeMo Core
-    pip install nemo-asr # 安装 NeMo asr 集合
-    pip install nemo-nlp # 安装 NeMo nlp 集合
-    pip install nemo-tts # 安装 NeMo tts 集合
-
-**教程**
+    pip install nemo-toolkit[all] # 安装 NeMo 核心和所有集合(nemo_asr, nemo_nlp, nemo_tts)
 
-* `语音识别 <https://nvidia.github.io/NeMo/asr/zh/intro.html>`_
-* `自然语言处理 <https://nvidia.github.io/NeMo/zh/nlp/intro.html>`_
-* `语音合成 <https://nvidia.github.io/NeMo/tts/zh/intro.html>`_
+教程
+------
 
+* `语音识别 <asr-docs>`_
+* `自然语言处理 <nlp-docs>`_
+* `语音合成 <tts-docs>`_
 
-**从github上安装**
+从github上安装
+--------------
 
 如果你更想用 NeMo 最新的开发版本（从 github上 获取），请按照下面的步骤：
 
-*Note*: 对于下面的步骤2和3，如果你想在开发模式下用 NeMo，用: ``pip install -e .`` 而不是 ``pip install .``
-
 1) 克隆这个仓库 ``git clone https://github.com/NVIDIA/NeMo.git``
-2) 切到 nemo 文件夹下，安装工具包: 
+2) 切到 nemo 文件夹下，安装工具包和各个集合:
 
 .. code-block:: bash
 
-    cd NeMo/nemo
-    pip install .
+    ./reinstall.sh
+
+.. note::
+    reinstall.sh 是在开发者模式下安装 NeMo
 
-3) 安装 collections:
+单元测试
+---------
+
+下面这个命令会运行单元测试:
 
 .. code-block:: bash
 
-    # 从 collections/nemo_asr 下安装 ASR 集合
-    apt-get install libsndfile1
-    cd NeMo/collections/nemo_asr
-    pip install .
+    ./reinstall.sh
+    python -m unittest tests/*.py
 
-    # 从 collections/nemo_nlp 下安装 NLP 集合
-    cd NeMo/collections/nemo_nlp
-    pip install .
+构建 Docker 容器
+-----------------
 
-    # 从 collections/nemo_tts 下安装 TTS 集合
-    cd NeMo/collections/nemo_tts
-    pip install .
+NeMo Docker 镜像需要 Docker Buildx (包含在 Docker 19.03)。 想要构建一个自定义的 NeMo Docker 镜像, 运行
 
-**单元测试**
+.. code-block:: bash
 
-下面这个命令会运行单元测试:
+    docker buildx build --build-arg NEMO_VERSION=$(git describe --tags) -t nemo .
+
+``NEMO_VERSION``参数是必须的。我们推荐设置为 ``git describe --tags`` 这样构建就可追溯和可复现的。
+在运行时候, ``NEMO_VERSION`` 在构建时候指定的值会变成环境变量。
+
+你也可以指定参数 ``BASE_IMAGE`` 来重载底层版本的 Pytorch，不过，不保证兼容性。
+
+为了开发需要, 你也可以构建一个只包含 NeMo 依赖的Docker镜像。
+在运行时，把你本地的开发分支映射到容器中。
 
 .. code-block:: bash
 
+    # 构建开发容器
+    docker buildx build --build-arg BASE_IMAGE=nvcr.io/nvidia/pytorch:20.01-py3 --target nemo-deps -t nemo-devel .
+
+    # 运行容器, 把本地 nemo 映射进去
+    cd <nemo_path>
+    docker run -it --rm --gpus all -v $(pwd):/workspace/nemo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.10
+
+    # 在开发模式下安装
     ./reinstall.sh
-    python -m unittest tests/*.py
+
diff --git a/docs/docs_zh/sources/source/installation.rst b/docs/docs_zh/sources/source/installation.rst
deleted file mode 100644
index 74b155c9201c..000000000000
--- a/docs/docs_zh/sources/source/installation.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-.. _installation:
-
-如何安装
-========
-
-**依赖组件**
-
-1) Python 3.6 or 3.7
-2) `CUDA <https://developer.nvidia.com/cuda-downloads/>`_ >= 10.0
-3) `cuDNN <https://developer.nvidia.com/cudnn/>`_ >= 7.6
-4) `APEX <https://github.com/NVIDIA/apex/>`_
-5) PyTorch >=1.2
-6) 对于多 GPU 或者分布式训练，推荐安装： `NCCL <https://github.com/NVIDIA/nccl/>`_ >= 2.4
-
-.. tip::
-    您还可以不安装这些依赖，直接使用 `英伟达的 PyTorch 镜像 <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ .
-    在该镜像中，所有的依赖都已提前为您安装好。方便您直接使用。
-
-**安装 NeMo 及其 Collections**
-
-1) 克隆代码库：
-
-.. code-block:: bash
-
-    git clone https://github.com/NVIDIA/nemo
-
-2) 切换到 ``nemo`` 文件夹并运行： ``python setup.py install``
-
-3) 安装 collections
-
-   a) ASR: 进入 ``collections/nemo_asr`` 文件夹并运行 ``sudo apt-get install libsndfile1 && python setup.py install``
-   b) NLP: 进入 ``collections/nemo_nlp`` 文件夹并运行 ``python setup.py install``
-   c) LPR: 进入 ``collections/nemo_simple_gan`` 文件夹并运行 ``python setup.py install``
-
-对于开发模型，请运行 ``python setup.py develop``
-
-4) 运行单元测试以验证是否安装成功：
-
-.. code-block:: bash
-
-    python -m unittest tests/*.py
-
-5) 切换到 ``examples/start_here`` 文件夹运行一些范例。
-
diff --git a/docs/docs_zh/sources/source/nlp/asr-improvement.rst b/docs/docs_zh/sources/source/nlp/asr-improvement.rst
index 3673fe7de35d..5382215d98dd 100644
--- a/docs/docs_zh/sources/source/nlp/asr-improvement.rst
+++ b/docs/docs_zh/sources/source/nlp/asr-improvement.rst
@@ -24,21 +24,21 @@ Librispeech 训练数据集包含三个部分: train-clean-100, train-clean-360,
 
 从预训练 BERT 模型中加载参数
 ----------------------------
-编码器和解码器用的都是预训练的 BERT 模型参数。 因为 BERT 的语言模型和 Transformer 的编码器结构相同，因此没有其他什么需要做的。从预训练的 BERT 模型中为解码器准备参数，我们写了一个脚本 ``get_decoder_params_from_bert.py`` 会从 ``pytorch-transformers`` :cite:`asr-imps-huggingface2019transformers` 下载参数，并把他们映射到解码器的参数上.
+编码器和解码器用的都是预训练的 BERT 模型参数。 因为 BERT 的语言模型和 Transformer 的编码器结构相同，因此没有其他什么需要做的。从预训练的 BERT 模型中为解码器准备参数，我们写了一个脚本 ``get_decoder_params_from_bert.py`` 会从 ``transformers`` 仓库 :cite:`asr-imps-huggingface2019transformers` 下载参数，并把他们映射到解码器的参数上。
 编码器和解码器的注意力是用 self-attention 参数做初始化的。
-这个脚本位于 ``scripts`` 文件目录下，接受两个参数：
+这个脚本位于 ``examples/nlp/asr_postprocessor/get_decoder_params_from_bert.py`` 文件目录下，接受两个参数：
 
 * ``--model_name``: 模型名称，可选择 ``bert-base-cased``, ``bert-base-uncased`` 等参数。
 * ``--save_to``: 指定保存目录
 
     .. code-block:: bash
 
-        $ python get_decoder_params_from_bert.py --model_name bert-base-uncased
+        $ python get_decoder_params_from_bert.py --model_name bert-base-uncased --save_to results_dir
 
 神经模块概览
 ------------
 
-首先，因为所有的模块都是由NeMo构建的，我们需要初始化 ``NeuralModuleFactory`` ，我们需要定义 1) backend (目前只支持PyTorch)，2) 混精度优化等级，3) GPU的loca rank以及，4) 一个实验管理器，创建一个时间戳的文件夹来存储 checkpoints 和相关的输出，日志文件以及 TensorBoard 的图。
+首先，因为所有的模块都是由NeMo构建的，我们需要初始化 ``NeuralModuleFactory`` ，我们需要定义 1) 后端(backend) (PyTorch)，2) 混精度优化等级，3) GPU的loca rank以及，4) 一个实验管理器，创建一个时间戳的文件夹来存储 checkpoints 和相关的输出，日志文件以及 TensorBoard 的图。
 
     .. code-block:: python
 
@@ -54,14 +54,14 @@ Librispeech 训练数据集包含三个部分: train-clean-100, train-clean-360,
 
     .. code-block:: python
 
-        tokenizer = NemoBertTokenizer(pretrained_model="bert-base-uncased")
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model="bert-base-uncased")
 
-编码器模块对应于 BERT 的语言模型，它来自于 ``nemo_nlp.huggingface`` 模块：
+编码器模块对应于 BERT 的语言模型，它来自于 ``nemo_nlp.nm.trainables.huggingface`` 模块：
 
     .. code-block:: python
 
         zeros_transform = nemo.backends.pytorch.common.ZerosLikeNM()
-        encoder = nemo_nlp.huggingface.BERT(
+        encoder = nemo_nlp.nm.trainables.huggingface.BERT(
             pretrained_model_name=args.pretrained_model,
             local_rank=args.local_rank)
 
@@ -87,7 +87,7 @@ Librispeech 训练数据集包含三个部分: train-clean-100, train-clean-360,
 
     .. code-block:: python
 
-        decoder = nemo_nlp.TransformerDecoderNM(
+        decoder = nemo_nlp.nm.trainables.TransformerDecoderNM(
             d_model=args.d_model,
             d_inner=args.d_inner,
             num_layers=args.num_layers,
@@ -109,16 +109,16 @@ Librispeech 训练数据集包含三个部分: train-clean-100, train-clean-360,
 模型训练
 --------
 
-训练模型，运行 ``asr_postprocessor.py.py`` ，它位于 ``examples/nlp`` 目录中。我们用 novograd 优化器来训练 :cite:`asr-imps-ginsburg2019stochastic`, 设置学习率 ``lr=0.001`` ，多项式学习率衰减策略, ``1000`` 步预热, 每个GPU的 batch size 为 ``4096*8`` 个符号, 以及 ``0.25`` dropout 概率。我们在8块GPU上做训练，可以用下面的方法开启多GPU训练模式:
+训练模型，运行 ``asr_postprocessor.py.py`` ，它位于 ``examples/nlp/asr_postprocessor`` 目录中。我们用 novograd 优化器来训练 :cite:`asr-imps-ginsburg2019stochastic`, 设置学习率 ``lr=0.001`` ，多项式学习率衰减策略, ``1000`` 步预热, 每个GPU的 batch size 为 ``4096*8`` 个符号, 以及 ``0.25`` dropout 概率。我们在8块GPU上做训练，可以用下面的方法开启多GPU训练模式:
 
     .. code-block:: bash
 
-        $ python -m torch.distributed.launch --nproc_per_node=8  asr_postprocessor.py --data_dir ../../tests/data/pred_real/ --restore_from ../../scripts/bert-base-uncased_decoder.pt
+        $ python -m torch.distributed.launch --nproc_per_node=8  asr_postprocessor.py --data_dir data_dir --restore_from bert-base-uncased_decoder.pt
 
 参考
 ----
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: ASR-IMPROVEMENTS
     :keyprefix: asr-imps- 
diff --git a/docs/docs_zh/sources/source/nlp/bert_pretraining.rst b/docs/docs_zh/sources/source/nlp/bert_pretraining.rst
index 24d3a556020b..98cd950db87c 100644
--- a/docs/docs_zh/sources/source/nlp/bert_pretraining.rst
+++ b/docs/docs_zh/sources/source/nlp/bert_pretraining.rst
@@ -1,77 +1,99 @@
-BERT预训练
-==========
+BERT 预训练
+============
 
-在本教程中，我们会按照BERT模型结构 :cite:`nlp-bert-devlin2018bert` 构建并训练一个掩码语言模型。训练可以完全从零开始或者在一个预训练好的模型基础上继续训练。在开始本教程之前，请先安装好 ``nemo`` 和 ``nemo_nlp`` 。关于安装 ``nemo`` 的一些步骤可以参阅 :ref:`installation` 章节。
+在本教程中，我们会按照 BERT 模型结构 :cite:`nlp-bert-devlin2018bert` 构建并训练一个掩码语言模型。训练可以完全从零开始或者在一个预训练好的模型基础上继续训练。在开始本教程之前，请先安装好 ``nemo`` 和 ``nemo_nlp`` 。关于安装 ``nemo`` 的一些步骤可以参阅 :ref:`installation` 章节。
 
-创建一个专门领域的BERT模型对于某些应用是更有优势的。比如一个专门针对生物医学领域的专业BERT，类似于BioBERT :cite:`nlp-bert-lee2019biobert` 和SciBERT :cite:`nlp-bert-beltagy2019scibert` 。
+创建一个专门领域的BERT模型对于某些应用是更有优势的。比如一个专门针对生物医学领域的专业 BERT ，类似于 BioBERT :cite:`nlp-bert-lee2019biobert` 和 SciBERT :cite:`nlp-bert-beltagy2019scibert` 。
 
-本教程中所使用的代码来自于 ``examples/nlp/language_modeling/bert_pretraining.py``.
+本教程中所使用的代码来自于 `examples/nlp/language_modeling/bert_pretraining.py`.
+
+.. tip::
+    我们提供了几个 BERT 预训练模型，您可以直接使用：
+    `bert large uncased for nemo <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo>`__
+    `bert base uncased for nemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo>`__
+    `bert base cased for nemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo>`__
+
+简介
+------------
+
+创建领域相关的 BERT 模型对于很多应用都有溢。一个显著的领域相关的 BERT 模型的例子是生物医学的场景下，
+比如 BioBERT :cite:`nlp-bert-lee2019biobert` 和 SciBERT :cite:`nlp-bert-beltagy2019scibert`.
+
+.. _bert_data_download:
 
 语料下载
 --------
 
-因为这只是一个演示，所以我们使用一个非常小的英文数据集 WikiText-2 :cite:`nlp-bert-merity2016pointer` 。
+训练语料可以是原始的文本数据，也可以是预处理过的数据。如果是原始文本，我们需要在训练的过程中进行文本处理。接下来，我们会分别说明这两种情况。
+首先我们演示如何在原始文本数据上做训练。我们使用一个非常小的英文数据集 WikiText-2 :cite:`nlp-bert-merity2016pointer` 。
 
-运行脚本 ``examples/nlp/scripts/get_wt2.sh`` 便可以下载这个数据集。下载后并解压，会得到如下三个文件：
+运行脚本 `examples/nlp/language_modeling/get_wkt2.sh <data_dir>` 便可以下载这个数据集。下载后并解压，会得到如下三个文件：
 
-    .. code-block:: bash
+.. code-block:: bash
 
-        test.txt
-        train.txt
-        valid.txt
+    test.txt
+    train.txt
+    valid.txt
 
-如果想尝试训练中文BERT模型，你可以下载中文维基语料 wiki2019zh_。下载后，你需要解压并用这个脚本 ``examples/nlp/scripts/process_wiki_zh.py`` 来进行预处理
+如果想尝试训练中文 BERT 模型，你可以下载中文维基语料 wiki2019zh_ 。下载后，你需要解压并用这个脚本 `examples/nlp/language_modeling/process_wiki_zh.py` 来进行预处理
 
 .. _wiki2019zh: https://github.com/brightmart/nlp_chinese_corpus
 
-    .. code-block:: bash
+.. code-block:: bash
+
+    python examples/nlp/scripts/process_wiki_zh.py --data_dir=./wiki_zh --output_dir=./wiki_zh --min_frequency=3
+
+
+你也可以选择已经预处理好的数据进行训练。我们使用 BERT 论文中提及的维基百科和 BookCorpus 数据集。
+
+想要下载数据集，前往 `这个网址 <https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT>`__
+然后运行脚本文件 `./data/create_datasets_from_start.sh` 。
+如果顺利的话，你会得到两个名字类似于这样 `lower_case_[0,1]_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5`
+和 `lower_case_[0,1]_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5` 的文件夹。他们分别包含序列长度为128和512的数据。
 
-        python examples/nlp/scripts/process_wiki_zh.py --data_dir=./wiki_zh --output_dir=./wiki_zh --min_frequency=3
 
 创建分词器(Tokenizer)
 ---------------------
-首先你需要创建一个 `BERTPretrainingDataDesc` 对象来描述数据集的格式。这其中涉及的主要步骤包括将数据集符号化并创建词表(vocabulary)和一个分词器(tokenizer).
 
-你也可以使用一个现成的词表或者分词器模型来跳过这一步。如果你已经有一个预训练好的分词器模型，将它复制到文件夹 ``[data_dir]/bert`` 下并重命名为 ``tokenizer.model`` 。
+对于原始文本数据，你需要一个分词器来进行处理。
+首先你需要创建一个 `BERTPretrainingDataDesc` 对象来描述数据集的格式。这其中涉及的主要步骤包括将数据集符号化并创建词表 (vocabulary) 和一个分词器 (tokenizer) .
 
-如果你有一个现成的词表文件，可以将它复制到文件夹 ``[data_dir]/bert`` 下并命名为 ``vocab.txt`` 。
+你也可以使用一个现成的词表或者分词器模型来跳过这一步。如果你已经有一个预训练好的分词器模型，将它复制到文件夹 `[data_dir]/bert` 下并重命名为 `tokenizer.model` 。
 
-    .. code-block:: python
+如果你有一个现成的词表文件，可以将它复制到文件夹 `[data_dir]/bert` 下并命名为 `vocab.txt` 。
+
+.. code-block:: python
 
-        data_desc = BERTPretrainingDataDesc(args.dataset_name,
-                                            args.data_dir,
-                                            args.vocab_size,
-                                            args.sample_size,
-                                            special_tokens,
-                                            'train.txt')
+    import nemo.collections.nlp as nemo_nlp
+
+    data_desc = nemo_nlp.data.BERTPretrainingDataDesc(
+                    dataset_name=args.dataset_name,
+                    train_data=args.data_dir,
+                    vocab_size=args.vocab_size,
+                    sample_size=args.sample_size,
+                    special_tokens=special_tokens)
 
 接下来我们需要定义tokenizer。如果你想使用一个自定义的词表文件，我们强烈推荐使用 `SentencePieceTokenizer` 。如果要训练中文BERT模型，请使用 `NemoBertTokenizer` 。
 
-    .. code-block:: python
+.. code-block:: python
+
+    # If you're using a custom vocabulary, create your tokenizer like this
+    tokenizer = nemo_nlp.data.SentencePieceTokenizer(model_path="tokenizer.model")
+    special_tokens = nemo_nlp.data.get_bert_special_tokens('bert')
+    tokenizer.add_special_tokens(special_tokens)
+
+    # Otherwise, create your tokenizer like this
+    tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model="bert-base-uncased")
+    # or
+    tokenizer = nemo_nlp.data.NemoBertTokenizer(vocab_file="vocab.txt")
 
-        # If you're using a custom vocabulary, create your tokenizer like this
-        tokenizer = SentencePieceTokenizer(model_path="tokenizer.model")
-        special_tokens = {
-            "sep_token": "[SEP]",
-            "pad_token": "[PAD]",
-            "bos_token": "[CLS]",
-            "mask_token": "[MASK]",
-            "eos_token": "[SEP]",
-            "cls_token": "[CLS]",
-        }
-        tokenizer.add_special_tokens(special_tokens)
-
-        # Otherwise, create your tokenizer like this
-        tokenizer = NemoBertTokenizer(vocab_file="vocab.txt")
-        # or
-        tokenizer = NemoBertTokenizer(pretrained_model="bert-base-uncased") 
 
 创建模型
 --------
 
 .. tip::
 
-    建议你在一个Jupyter notebook中尝试以下内容，以方便调试。
+    建议你在一个 Jupyter notebook 中尝试以下内容，以方便调试。
 
 首先，我们需要创建一个 `NeuralModuleFactory` 对象并调用所支持的后端。具体如何创建还取决于你是否想进行多GPU训练或者混合精度训练等。在本教程中，我们只使用一个GPU，而且没有混合精度训练。如果你想使用混合精度训练，可以将 ``amp_opt_level`` 选项设置为 ``O1`` 或者 ``O2`` 。
 
@@ -97,92 +119,150 @@ BERT预训练
             max_position_embeddings=args.max_seq_length,
             hidden_act=args.hidden_act)
 
-如果你想从一个已有的BERT模型文件继续训练，那设置一个模型的名字即可。如果想查看完整的预训练好的BERT模型列表，可以使用 `nemo_nlp.huggingface.BERT.list_pretrained_models()` 。
+如果想从一个已有的模型开始训练，你可以指定选项 `--load_dir` 和类似于下面这样的代码：
 
-    .. code-block:: python
+.. code-block:: python
 
-        bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name="bert-base-cased")
+    ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
+                        load_from_folder=args.load_dir)
 
-接下来，我们需要定义分类器和损失函数。在本教程中，我们会同时使用掩码语言模型和预测下一句模型这两个模型的损失函数，如果你只用掩饰语言模型作为损失的话，可能会观察到更高的准确率。
+如果你想从一个已有的 BERT 模型文件继续训练，那设置一个模型的名字即可。如果想查看完整的预训练好的 BERT 模型列表，可以使用 `nemo_nlp.huggingface.BERT.list_pretrained_models()` 。
 
     .. code-block:: python
 
-        mlm_classifier = nemo_nlp.nm.trainables.TokenClassifier(args.d_model,
-                                                  num_classes=tokenizer.vocab_size,
-                                                  num_layers=1,
-                                                  log_softmax=True)
-        mlm_loss_fn = nemo_nlp.nm.losses.MaskedLanguageModelingLossNM()
-
-        nsp_classifier = nemo_nlp.nm.trainables.SequenceClassifier(args.d_model,
-                                                     num_classes=2,
-                                                     num_layers=2,
-                                                     log_softmax=True)
-        nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()
-
-        bert_loss = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)
+        bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name="bert-base-cased")
 
-然后，我们把从输入到输出的整个计算流程封装成一个函数。有了这个函数，我们就可以很方便的分别创建训练流和评估流：
+接下来，我们需要定义分类器和损失函数。在本教程中，我们会同时使用掩码语言模型和预测下一句模型这两个模型的损失函数，如果你只用掩饰语言模型作为损失的话，可能会观察到更高的准确率。
 
     .. code-block:: python
 
-        def create_pipeline(**args):
-                    data_layer = nemo_nlp.nm.data_layers.BertPretrainingDataLayer(
-                                            tokenizer,
-                                            data_file,
-                                            max_seq_length,
-                                            mask_probability,
-                                            short_seq_prob,
-                                            batch_size)
-                    # for preprocessed data
-                    # data_layer = nemo_nlp.BertPretrainingPreprocessedDataLayer(
-                    #        data_file,
-                    #        max_predictions_per_seq,
-                    #        batch_size, is_training)
+        mlm_classifier = nemo_nlp.nm.trainables.BertTokenClassifier(
+                                    args.hidden_size,
+                                    num_classes=args.vocab_size,
+                                    activation=ACT2FN[args.hidden_act],
+                                    log_softmax=True)
 
-                    steps_per_epoch = len(data_layer) // (batch_size * args.num_gpus * args.batches_per_step)
+        mlm_loss_fn = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss()
 
-                    input_data = data_layer()
+        nsp_classifier = nemo_nlp.nm.trainables.SequenceClassifier(
+                                                args.hidden_size,
+                                                num_classes=2,
+                                                num_layers=2,
+                                                activation='tanh',
+                                                log_softmax=False)
 
-                    hidden_states = bert_model(input_ids=input_data.input_ids,
-                                            token_type_ids=input_data.input_type_ids,
-                                            attention_mask=input_data.input_mask)
+        nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLossNM()
 
-                    mlm_logits = mlm_classifier(hidden_states=hidden_states)
-                    mlm_loss = mlm_loss_fn(logits=mlm_logits,
-                                        output_ids=input_data.output_ids,
-                                        output_mask=input_data.output_mask)
+        bert_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
 
-                    nsp_logits = nsp_classifier(hidden_states=hidden_states)
-                    nsp_loss = nsp_loss_fn(logits=nsp_logits, labels=input_data.labels)
-
-                    loss = bert_loss(loss_1=mlm_loss, loss_2=nsp_loss)
-
-                    return loss, mlm_loss, nsp_loss, steps_per_epoch
+之后，我们将 encoder embedding 层的权重与 MLM 输出层绑定：
 
+    .. code-block:: python
 
-                train_loss, _, _, steps_per_epoch = create_pipeline(
-                                            data_file=data_desc.train_file,
-                                            preprocessed_data=False,
-                                            max_seq_length=args.max_seq_length,
-                                            mask_probability=args.mask_probability,
-                                            short_seq_prob=args.short_seq_prob,
-                                            batch_size=args.batch_size,
-                                            batches_per_step=args.batches_per_step)
+        mlm_classifier.tie_weights_with(
+            bert_model,
+            weight_names=["mlp.last_linear_layer.weight"],
+            name2name_and_transform={
+                "mlp.last_linear_layer.weight": ("bert.embeddings.word_embeddings.weight", nemo.core.WeightShareTransform.SAME)
+            },
+        )
+
+然后，我们把从输入到输出的整个计算流程封装成一个函数。有了这个函数，我们就可以很方便的分别创建训练计算图和评估计算图。
+
+如果用的是原始文本数据，则选择 `nemo_nlp.nm.data_layers.BertPretrainingDataLayer` 。如果是预处理好的数据，则选择 `nemo_nlp.nm.data_layers.BertPretrainingPreprocessedDataLayer`
+
+.. code-block:: python
+
+    def create_pipeline(**args):
+        data_layer = nemo_nlp.nm.data_layers.BertPretrainingDataLayer(
+                                tokenizer,
+                                data_file,
+                                max_seq_length,
+                                mask_probability,
+                                short_seq_prob,
+                                batch_size)
+        # for preprocessed data
+        # data_layer = nemo_nlp.BertPretrainingPreprocessedDataLayer(
+        #        data_file,
+        #        max_predictions_per_seq,
+        #        batch_size,
+        #        mode)
+
+        steps_per_epoch = len(data_layer) // (batch_size * args.num_gpus * args.batches_per_step)
+
+        input_data = data_layer()
+
+        hidden_states = bert_model(input_ids=input_data.input_ids,
+                                   token_type_ids=input_data.input_type_ids,
+                                   attention_mask=input_data.input_mask)
+
+        mlm_logits = mlm_classifier(hidden_states=hidden_states)
+        mlm_loss = mlm_loss_fn(logits=mlm_logits,
+                               labels=input_data.output_ids,
+                               output_mask=input_data.output_mask)
+
+        nsp_logits = nsp_classifier(hidden_states=hidden_states)
+        nsp_loss = nsp_loss_fn(logits=nsp_logits, labels=input_data.labels)
+
+        loss = bert_loss(loss_1=mlm_loss, loss_2=nsp_loss)
+
+        return loss, mlm_loss, nsp_loss, steps_per_epoch
+
+
+    train_loss, _, _, steps_per_epoch = create_pipeline(
+                                data_file=data_desc.train_file,
+                                preprocessed_data=False,
+                                max_seq_length=args.max_seq_length,
+                                mask_probability=args.mask_probability,
+                                short_seq_prob=args.short_seq_prob,
+                                batch_size=args.batch_size,
+                                batches_per_step=args.batches_per_step,
+                                mode="train")
+
+    # for preprocessed data 
+    # train_loss, _, _, steps_per_epoch = create_pipeline(
+    #                            data_file=args.train_data,
+    #                            preprocessed_data=True,
+    #                            max_predictions_per_seq=args.max_predictions_per_seq,
+    #                            batch_size=args.batch_size,
+    #                            batches_per_step=args.batches_per_step,
+    #                            mode="train")
+
+    eval_loss, _, _, _ = create_pipeline(
+                                    data_file=data_desc.eval_file,
+                                    preprocessed_data=False,
+                                    max_seq_length=args.max_seq_length,
+                                    mask_probability=args.mask_probability,
+                                    short_seq_prob=args.short_seq_prob,
+                                    batch_size=args.batch_size,
+                                    batches_per_step=args.batches_per_step,
+                                    mode="eval")
+
+    # for preprocessed data 
+    # eval_loss, eval_mlm_loss, eval_nsp_loss, _ = create_pipeline(
+    #                            data_file=args.eval_data,
+    #                            preprocessed_data=True,
+    #                            max_predictions_per_seq=args.max_predictions_per_seq,
+    #                            batch_size=args.batch_size,
+    #                            batches_per_step=args.batches_per_step,
+    #                            mode="eval")
+
+
+运行
+----
 
-                # for preprocessed data 
-                # train_loss, _, _, steps_per_epoch = create_pipeline(
-                #                            data_file=args.data_dir,
-                #                            preprocessed_data=True,
-                #                            max_predictions_per_seq=args.max_predictions_per_seq,
-                #                            training=True,
-                #                            batch_size=args.batch_size,
-                #                            batches_per_step=args.batches_per_step)
+接着定义学习率：
 
-                eval_loss, eval_tensors, _ = create_pipeline(data_desc.eval_file,
-                                                            args.max_seq_length,
-                                            
+    .. code-block:: python
 
+        lr_policy_fn = get_lr_policy(args.lr_policy,
+                                    total_steps=args.num_iters,
+                                    warmup_ratio=args.lr_warmup_proportion)
 
+        # if you are training on raw text data, you have use the alternative to set the number of training epochs
+        lr_policy_fn = get_lr_policy(args.lr_policy,
+                                     total_steps=args.num_epochs * steps_per_epoch,
+                                     warmup_ratio=args.lr_warmup_proportion)
 
 再然后，我们定义一些必要的回调函数：
 
@@ -192,23 +272,20 @@ BERT预训练
 
     .. code-block:: python
 
-        train_callback = nemo.core.SimpleLossLoggerCallback(...)
-        eval_callback = nemo.core.EvaluatorCallback(...)
-        ckpt_callback = nemo.core.CheckpointCallback(...)
-
-.. tip::
-
-    Tensorboard_ 是一个非常棒的调试工具。虽然不是训练的必要步骤，但是你可以安装 tensorboardX_ 并在训练过程中运行它来观察一些指标在训练过程中的变化：
-
-    .. code-block:: bash
-
-        tensorboard --logdir bert_pretraining_tb
-
-.. _Tensorboard: https://www.tensorflow.org/tensorboard
-.. _tensorboardX: https://github.com/lanpa/tensorboardX
+        train_callback = nemo.core.SimpleLossLoggerCallback(tensors=[train_loss],
+            print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())))),
+            step_freq=args.train_step_freq,
+        eval_callback = nemo.core.EvaluatorCallback(eval_tensors=[eval_loss],
+            user_iter_callback=nemo_nlp.callbacks.lm_bert_callback.eval_iter_callback,
+            user_epochs_done_callback=nemo_nlp.callbacks.lm_bert_callback.eval_epochs_done_callback
+            eval_step=args.eval_step_freq)
+        ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
+            epoch_freq=args.save_epoch_freq,
+            load_from_folder=args.load_dir,
+            step_freq=args.save_step_freq)
 
 
-我们还建议把模型参数保存到一个配置文件中。这样做的话，你以后使用NeMo的时候导入BERT模型会非常方便。
+我们还建议把模型参数保存到一个配置文件中。这样做的话，你以后使用 NeMo 的时候导入 BERT 模型会非常方便。
 
     .. code-block:: python
 
@@ -221,10 +298,6 @@ BERT预训练
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
-
         nf.train(tensors_to_optimize=[train_loss],
                  lr_policy=lr_policy_fn,
                  callbacks=[train_callback, eval_callback, ckpt_callback],
@@ -232,12 +305,60 @@ BERT预训练
                  optimization_params={"batch_size": args.batch_size,
                                       "num_epochs": args.num_epochs,
                                       "lr": args.lr,
+                                      "betas": (args.beta1, args.beta2),
                                       "weight_decay": args.weight_decay})
 
+如何使用样例中的训练脚本
+------------------------
+
+完整的 BERT 模型训练脚本保存在这个文件中： `examples/nlp/language_modeling/bert_pretraining.py`
+
+如果想进行单个 GPU 的训练，可以运行这个命令：
+
+.. code-block:: bash
+
+    cd examples/nlp/language_modeling
+    python bert_pretraining.py [args]
+
+
+如果想进行多 GPU 训练，可以运行：
+
+.. code-block:: bash
+
+    cd examples/nlp/language_modeling
+    python -m torch.distributed.launch --nproc_per_node=x bert_pretraining.py --num_gpus=x [args]
+
+如果使用的是原始的文本数据，请在命令行中添加选项 ``data_text``
+
+.. code-block:: bash
+
+    python bert_pretraining.py [args] data_text [args]
+
+如果使用的是预处理过的数据（默认配置），请使用 ``data_preprocessed``
+
+.. code-block:: bash
+
+    python bert_pretraining.py [args] data_preprocessed [args]
+
+.. note::
+    关于下载和预处理数据，请参阅 :ref:`bert_data_download`
+
+.. tip::
+
+    Tensorboard_ 是一个非常棒的调试工具。虽然不是训练的必要步骤，但是你可以安装 tensorboardX_ 并在训练过程中运行它来观察一些指标在训练过程中的变化：
+
+    .. code-block:: bash
+
+        tensorboard --logdir bert_pretraining_tb
+
+.. _Tensorboard: https://www.tensorflow.org/tensorboard
+.. _tensorboardX: https://github.com/lanpa/tensorboardX
+
+
 参考
 ----
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-BERT-PRETRAINING
     :keyprefix: nlp-bert-
diff --git a/docs/docs_zh/sources/source/nlp/dialogue_state_tracking_trade.rst b/docs/docs_zh/sources/source/nlp/dialogue_state_tracking_trade.rst
new file mode 100644
index 000000000000..368783b394fe
--- /dev/null
+++ b/docs/docs_zh/sources/source/nlp/dialogue_state_tracking_trade.rst
@@ -0,0 +1,243 @@
+教程
+========
+
+
+简介
+-----
+
+**对话状态追踪 (DST)** :cite:`nlp-dst-henderson2015machine` \
+的目标是要为正在进行的对话的状态构建一个表示(representation) \
+对话是一系列的对话参与者之间的语句。\
+换句话说，DST 系统的目标是能捕捉到用户的目标和意图 \
+然后把它们编码成一系列的**槽(slots)**和槽对应的**值(values)**。
+
+
+.. figure:: dst_multiwoz_example.png
+
+   Fig. 1: 一个例子, 多领域对话和相关的状态追踪 (来源: \
+   :cite:`nlp-dst-wu2019transferable`)
+
+
+在这个教程中我们关注多领域对话数据集 MultiWOZ :cite:`nlp-dst-budzianowski2018multiwoz` \
+展示如何构建一个 TRADE 模型 :cite:`nlp-dst-wu2019transferable`, \
+一个最近发表的领域先进的模型 \
+**多领域(Multi-domain)** 场景会引入一些挑战, 最重要的来自于需要 \
+**多轮映射(multi-turn mapping)**。在一个 **单轮映射(single-turn mapping)** 场景，(**领域(domain)**, **槽(slot)**, **值(value)**) 三元组可以从 \
+单轮中就能推断出。在多轮对话中，这个假设并不存在，DST 系统必须能够从多轮中 \
+推断出, 这些信息有可能是横跨多个不同的领域的。
+
+
+
+
+MultiWOZ 数据集
+--------------------
+
+多领域数据集 Wizard-of-Oz (`MultiWOZ`_) 是一个囊括了 \
+7个领域包含超过10,000个对话的人-人数据集。
+原先的 MultiWOZ 2.0 数据集是这篇文章引入的 :cite:`nlp-dst-budzianowski2018multiwoz`.
+然而，在这个教程中我们用数据集 MultiWOZ 2.1  :cite:`nlp-dst-eric2019multiwoz`, 它 MultiWOZ 2.0 的升级版。它和原先的数据集有固定的一些问题像是状态的错误，语句错误，值规范化的问题等)。我们的模型也可以在 MultiWOZ 2.0 上训练。
+
+.. _MultiWOZ: https://www.repository.cam.ac.uk/handle/1810/294507
+
+数据集包含下面这些领域:
+ 1. restaurant
+ 2. hotel
+ 3. attraction
+ 4. taxi
+ 5. train
+ 6. hospital
+ 7. police.
+
+和下面这些槽:
+ * inform (∗)
+ * address (∗)
+ * postcode (∗)
+ * phone (∗)
+ * name (1234)
+ * no of choices (1235)
+ * area (123)
+ * pricerange (123)
+ * type (123)
+ * internet (2)
+ * parking (2)
+ * stars (2)
+ * open hours (3)
+ * departure (45)
+ * destination (45)
+ * leave after (45)
+ * arrive by (45)
+ * no of people (1235)
+ * reference no. (1235)
+ * trainID (5)
+ * ticket price (5)
+ * travel time (5)
+ * department (7)
+ * day (1235)
+ * no of days (123).
+
+
+请注意，一些动作(actions)和槽只和特定领域有关，但一些是全部通用的, \
+比如，领域无关的。后者用(∗)表示。
+
+
+MultiWOZ 数据集有 10,438 个对话，总共 115,434 轮。 \
+对话通常分成单和多领域对话。 \
+对话长度分布从 1 到 31，大约 70% 对话有超过 10 轮。 \
+平均轮数是对单领域和多领域分别为 8.93 和 15.39。 \
+
+每个对话包括一个目标，多个用户和系统语句以及一个信念状态(belief state)和每轮的对话操作(action)以及相应的槽 \
+另外，每个对话都有一个任务描述。 \
+而且，它包含了系统和用户对话操作(act)的标注 (后者在 MultiWOZ 2.1 中引入).
+
+
+TRADE 模型
+---------------
+
+**TRA**\nsferable **D**\ialogue stat\ **E** generator (TRADE) :cite:`nlp-dst-wu2019transferable` 是为 \
+多领域面向任务的对话状态追踪问题
+特别设计的模型 \
+模型从语句和历史中生成对话状态。它为领域和槽学习嵌入(embeddings)并且 \
+受益于拷贝机制(copy mechanism)从而能够促进领域之间的知识转移。它使得模型能够预测 \
+在给定领域中，训练过程中从未见过的(**领域(domain)**, **槽(slot)**, **值(value)**)三元组。
+
+
+.. figure:: dst_trade_architecture.png
+
+   Fig. 2: TRADE 模型的架构 (来源: :cite:`nlp-dst-wu2019transferable`)
+
+模型由三个主要部分组成:
+
+ * 一个 **语句编码器(utterance encoder)**，
+ * 一个 **槽栅(slot gate)**，以及
+ * 一个 **状态生成器(state generator)**。  
+
+**语句编码器(utterance encoder)** 是一个双向 Gated Recurrent Unit (GRU), 返回上下文单词以及 \
+一个编码了整个对话历史的上下文向量。
+
+**状态生成器(state generator)** 也用了 GRU 来预测(domain, slot)对的值。生成器用了一个 soft-gated \
+pointer-generator copying，把 **词表上的分布** 和 **对话历史上的分布** 
+合成一个单独的输出分布。
+
+最后，**槽栅(slot gate)** 是个简单的分类器，把编码器隐状态的上下文向量 \
+映射到三个类上的概率分布: *ptr*, *none*,  和 *dontcare*.
+
+数据预处理
+-------------------
+
+首先，你需要从 `MultiWOZ2.1`_ 项目网站上下载 `MULTIWOZ2.1.zip` 。它包含了 \
+MultiWOZ 2.1 数据集。或者，你可以从 `MultiWOZ2.0`_ 上下载压缩文件 `MULTIWOZ2.zip` \
+它包含了这个数据集的老版本。
+
+.. _MultiWOZ2.1: https://www.repository.cam.ac.uk/handle/1810/294507
+
+.. _MultiWOZ2.0: https://www.repository.cam.ac.uk/handle/1810/280608
+
+接着我们需要预处理，重新格式化我们的数据集，这会将数据集分成三个分布:
+
+ * traininig split ( ``train_dials.json`` 文件包含了8242个对话)
+ * validation split ( ``val_dials.json`` 文件包含了1000个对话)
+ * test split (``test_dials.json`` 文件包含了999个对话)
+
+你可以用提供好的 `process_multiwoz.py`_ 脚本
+预处理 MultiWOZ 数据集:
+
+.. _process_multiwoz.py: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/dialogue_state_tracking/multiwoz/process_multiwoz.py
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking/multiwoz
+    python process_multiwoz.py
+
+.. note::
+    默认情况下，脚本假设你会把数据拷贝以及解压到 \
+    ``~/data/state_tracking/multiwoz2.1/`` \
+    目录下，并且它会把结果存到 ``~/data/state_tracking/multiwoz2.1`` 文件夹下 \
+    你可以在命令行中传入参数 ``source_data_dir`` 和 ``target_data_dir`` \
+    来修改。MultiWOZ 2.0 和 MultiWOZ 2.1 可以用相同的脚本处理。
+
+
+构建 NeMo 图
+-----------------------
+
+NeMo 训练图包括六个模块包括数据层，编码器，解码器和损失函数:
+
+ * data_layer (:class:`nemo.collection.nlp.nm.data_layers.MultiWOZDataLayer`)
+ * encoder (:class:`nemo.backends.pytorch.common.EncoderRNN`)
+ * decoder (:class:`nemo.collection.nlp.nm.trainables.TRADEGenerator`)
+ * gate_loss_fn (:class:`nemo.backends.pytorch.common.losses.CrossEntropyLossNM`)
+ * ptr_loss_fn (:class:`nemo.collections.nlp.nm.losses.MaskedLogLoss`)
+ * total_loss_fn (:class:`nemo.collection.nlp.nm.losses.LossAggregatorNM`)
+
+训练
+--------
+
+想要在数据集 MultiWOZ 2.1 上训练 TRADE 模型的实例，并且在它的测试数据集上进行评估，只需要 \
+用默认参数运行 `dialogue_state_tracking_trade.py`_ :
+
+.. _dialogue_state_tracking_trade.py: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py
+
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking
+    python dialogue_state_tracking_trade.py 
+
+
+.. note::
+    同样地，这个脚本会默认读取 ``~/data/state_tracking/multiwoz2.1`` 文件夹.
+    这个路径可以用 ``data_dir`` 覆盖。
+
+
+
+指标和结果
+-------------------
+
+在下面的表格中我们比较了我们实现的 TRADE 模型结果和 \
+原始论文 :cite:`nlp-dst-wu2019transferable` 中的结果。在作者们回复 MultiWOZ 2.0
+数据集的结果时候, 我们跑了在 MultiWOZ 2.1 数据集上的原始实现，也记录了这些结果。
+
+我们用了和原始实现中相同的参数。在我们的实现和原始的视线中有些区别。\
+主要的区别是我们的模型没有用预训练的词嵌入，似乎是会影响模型的效果的。 \
+另一个区别是我们在学习策略的时候用了 SquareAnnealing 而不是 \
+固定的学习率。另外，我们是根据训练集创建的词表，而原始实现 \
+是根据所有数据集包括测试和验证集创建的。我们模型的准确率的主要提升是 \
+用了更好的学习率策略。当我们用固定的学习率 \
+我们得到了和原始实现中相似的结果。
+
+我们再模型实现上也做了一些提升来加快训练。这使得我们的实现比原始的实现快很多 \
+另外, NeMo 支持多 GPU 训练，这使得训练时间更快了。 \
+需要注意的是在用多 GPU 的时候学习率应该调高， \
+因为 batch size 变大了。
+
+根据 :cite:`nlp-dst-wu2019transferable`, 我们用两个指标来衡量模型的性能:
+
+ * **联合目标准确率(Joint Goal Accuracy)** 比较了每轮对话中的预测对话状态和真实状态，并且输出只有当输出的**所有的值完全正确**
+    才会认为输出是正确的。
+ * **槽准确率(Slot Accuracy)** 独立地比较每个(domain, slot, value)三元组和它的真实值。
+
+
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+|                                             | MultiWOZ 2.0                      | MultiWOZ 2.1                      |
++                                             +--------+--------+--------+--------+--------+--------+--------+--------+
+|                                             | Test            |Development      |  Test           |Development      |
++                                             +--------+--------+--------+--------+--------+--------+--------+--------+
+| TRADE implementations                       | Goal   | Slot   | Goal   | Slot   | Goal   | Slot   | Goal   | Slot   |
++=============================================+========+========+========+========+========+========+========+========+
+| Original :cite:`nlp-dst-wu2019transferable` | 48.62% | 96.92% | 48.76% | 96.95% | 45.31% | 96.57% | 49.15% | 97.04% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+| NeMo's Implementation of TRADE              | 48.92% | 97.03% | 50.96% | 97.17% | 47.25% | 96.80% | 51.38% | 97.21% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+
+
+.. note::
+    在训练 TRADE 模型的时候用一个额外的监督信号，强制 Slot Gate 能够恰当的分类 \
+    上下文向量。脚本 `process_multiwoz.py`_ 从数据集中抽取额外的信息,
+    脚本 `dialogue_state_tracking_trade.py`_ 也汇报了 **Gating Accuracy**。
+
+参考
+-------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-DST
+    :keyprefix: nlp-dst-
diff --git a/docs/docs_zh/sources/source/nlp/dst_multiwoz_example.png b/docs/docs_zh/sources/source/nlp/dst_multiwoz_example.png
new file mode 100644
index 000000000000..6340335c3630
Binary files /dev/null and b/docs/docs_zh/sources/source/nlp/dst_multiwoz_example.png differ
diff --git a/docs/docs_zh/sources/source/nlp/dst_trade_architecture.png b/docs/docs_zh/sources/source/nlp/dst_trade_architecture.png
new file mode 100644
index 000000000000..cd42faacf60e
Binary files /dev/null and b/docs/docs_zh/sources/source/nlp/dst_trade_architecture.png differ
diff --git a/docs/docs_zh/sources/source/nlp/intro.rst b/docs/docs_zh/sources/source/nlp/intro.rst
index e5803139237b..c0149a03bcc3 100644
--- a/docs/docs_zh/sources/source/nlp/intro.rst
+++ b/docs/docs_zh/sources/source/nlp/intro.rst
@@ -3,6 +3,21 @@
 自然语言处理
 ============
 
+支持的任务和模型:
+
+* 意图识别和槽填充
+* 文本分类
+* 基于面向任务的对话系统的状态跟踪
+* 语言模型
+* 神经机器翻译
+* 问答系统
+* 命名实体识别 (NER)
+* 标点符号和单词大写
+* GLUE 基准
+* ASR 用 BERT 做后处理
+
+所有 NLP 集合下面的例子都在 `这里 <https://github.com/NVIDIA/NeMo/tree/master/examples/nlp>`__.
+
 神经网络机器翻译 (NMT)
 ----------------------
 .. toctree::
@@ -25,6 +40,16 @@ Transformer语言模型
 
    transformer_language_model
 
+
+对话状态跟踪
+---------------------
+
+.. toctree::
+   :maxdepth: 8
+
+   dialogue_state_tracking_trade
+
+
 命名实体识别 (NER)
 ------------------
 
@@ -33,7 +58,17 @@ Transformer语言模型
 
    ner
 
-Intent and Slot filling
+
+标点符号和单词首字母大写
+-----------------------------------
+
+.. toctree::
+   :maxdepth: 8
+
+   punctuation
+
+
+意图识别和槽填充
 -----------------------
 .. toctree::
    :maxdepth: 8
@@ -41,6 +76,14 @@ Intent and Slot filling
    joint_intent_slot_filling
 
 
+问答系统
+-----------------------
+.. toctree::
+   :maxdepth: 8
+
+   question_answering
+
+
 用 BERTx2 后处理模型来提升语音识别性能
 --------------------------------------
 .. toctree::
diff --git a/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst b/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
index 905b2a0400a0..7e36fbbfd745 100644
--- a/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
+++ b/docs/docs_zh/sources/source/nlp/joint_intent_slot_filling.rst
@@ -1,9 +1,14 @@
 教程
 ====
 
-在这个教程中，我们将使用 BERT 模型，来实现一个意图识别 (intent classification) 和槽填充 (slot filling) 混合系统，参考自 `BERT for Joint Intent Classification and Slot Filling <https://arxiv.org/abs/1902.10909>`_ :cite:`nlp-slot-chen2019bert` 。本教程中所有的代码全部来自 ``examples/nlp/joint_intent_slot_with_bert.py`` 。
+在这个教程中，我们将使用 BERT 模型，来实现一个意图识别 (intent classification) 和槽填充 (slot filling) 混合系统，参考自 `BERT for Joint Intent Classification and Slot Filling <https://arxiv.org/abs/1902.10909>`_ :cite:`nlp-slot-chen2019bert` 。本教程中所有的代码全部来自 ``examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py``。
+
+我们可以使用 `--pretrained_model_name` 这个参数，来选择多个预训练好的 BERT 模型。当前，我们使用的加载预训练模型的脚本均来自 `pytorch_transformers` 。更多预训练好的模型在 `这里下载 <https://huggingface.co/pytorch-transformers/pretrained_models.html>`_ 。
+
+.. tip::
+
+    在 NeMo 中进行BERT的预训练以及预训练好的模型checkpoints可以参见 `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__ 。
 
-我们可以使用 `--pretrained_bert_model` 这个参数，来选择四个预训练好的 BERT 模型。当前，我们使用的加载预训练模型的脚本均来自 `pytorch_transformers` 。更多预训练好的模型在 `这里下载 <https://huggingface.co/pytorch-transformers/pretrained_models.html>`_ 。
 
 写在开头
 --------
@@ -26,181 +31,230 @@
     * 输入文件: 一个 `tsv` 文件，第一行为 [sentence][tab][label]
     * 槽文件: 句子中所有符号串的槽标注，使用空格分隔。槽标注的数量需要与句子中所有符号串的数量保持一致。
 
-当前，我们提供多个数据集合的预处理脚本，包括: ATIS，可以通过 `Kaggle <https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk>`_ 进行下载；SNIP对话语言理解数据集，可以通过 `这里 <https://github.com/snipsco/spoken-language-understanding-research-datasets>`_ 获取。预处理脚本在 ``collections/nemo_nlp/nemo_nlp/text_data_utils.py`` 。
+当前，我们提供多个数据集合的预处理脚本，包括: ATIS，可以通过 `Kaggle <https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk>`_ 进行下载；SNIP对话语言理解数据集，可以通过 `这里 <https://github.com/snipsco/spoken-language-understanding-research-datasets>`_ 获取。通过把数据集名称这个参数设置成['atis', 'snips-light', 'snips-speak', 'snips-all']，你就可以将其转换成 NeMo 中的格式。
+
 
 代码结构
 --------
 
-首先，我们初始化 ``NeuralModuleFactory`` ，需要定义，1、后端 (PyTorch)；2、混合精度优化的级别；3、本地 GPU 的序列号；4、一个实验的管理器，用于创建文件夹来保存相应的 checkpoint、输出、日志文件和 TensorBoard 的图。
+首先，我们初始化 Neural Module Factory，需要定义，1、后端 (PyTorch 或者 Tensorflow)；2、混合精度优化的级别；3、本地 GPU 的序列号；4、一个实验的管理器，用于创建文件夹来保存相应的 checkpoint、输出、日志文件和 TensorBoard 的图。
 
     .. code-block:: python
 
         nf = nemo.core.NeuralModuleFactory(
-                        backend=nemo.core.Backend.PyTorch,
-                        local_rank=args.local_rank,
-                        optimization_level=args.amp_opt_level,
-                        log_dir=work_dir,
-                        create_tb_writer=True,
-                        files_to_copy=[__file__])
+            backend=nemo.core.Backend.PyTorch,
+            local_rank=args.local_rank,
+            optimization_level=args.amp_opt_level,
+            log_dir=work_dir,
+            create_tb_writer=True,
+            files_to_copy=[__file__],
+            add_time_to_log_dir=True,
+        )
 
-我们定义分词器，它可以将文本转换成符号串，这里使用来自 `pytorch_transformers` 的内置分词器。其将使用 BERT 模型的映射，把文本转成相应的符号串。
+我们定义分词器，它可以将文本转换成符号串，这里使用来自 `transformers` 的内置分词器。其将使用 BERT 模型的映射，把文本转成相应的符号串。
 
     .. code-block:: python
 
-        from pytorch_transformers import BertTokenizer
-        tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model=args.pretrained_model_name)
 
 接着，我们定义所有的神经网络模块，加入到意图识别和槽填充混合系统的流程中。
 
-    * 处理数据: `nemo_nlp/nemo_nlp/text_data_utils.py` 中的 `JointIntentSlotDataDesc` 类，用于将源数据处理成 `BertJointIntentSlotDataset` 支持的类型。当前，它支持SNIPS和ATIS两种格式的数据，当你也可以实现预处理脚本，来支持任意格式的数据。
-
-    JointIntentSlotDataDesc 对象包含例如 `self.train_file`, `self.train_slot_file`, `self.eval_file`, `self.eval_slot_file`,  `self.intent_dict_file` 和 `self.slot_dict_file` 等信息。
+    * 处理数据: `nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/data_descriptor.py` 中的 `JointIntentSlotDataDesc` 类，用于将源数据处理成 `BertJointIntentSlotDataset` 支持的类型。当前，它支持 SNIPS 和 ATIS 两种格式的数据，当你也可以实现预处理脚本，来支持任意格式的数据。
 
     .. code-block:: python
 
+        from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import JointIntentSlotDataDesc
         data_desc = JointIntentSlotDataDesc(
-            args.dataset_name, args.data_dir, args.do_lower_case)
+            args.data_dir, args.do_lower_case, args.dataset_name, args.none_slot_label, args.pad_label
+        )
 
-    * 数据集: 将数据转换成 `DataLayerNM` 可以接收的格式。
+    * 加载预训练好的 BERT 模型来对相应的输入进行编码。
 
     .. code-block:: python
 
-        def get_dataset(data_desc, mode, num_samples):
-            nemo.logging.info(f"Loading {mode} data...")
-            data_file = getattr(data_desc, mode + '_file')
-            slot_file = getattr(data_desc, mode + '_slot_file')
-            shuffle = args.shuffle_data if mode == 'train' else False
-            return nemo_nlp.BertJointIntentSlotDataset(
-                input_file=data_file,
-                slot_file=slot_file,
-                pad_label=data_desc.pad_label,
-                tokenizer=tokenizer,
-                max_seq_length=args.max_seq_length,
-                num_samples=num_samples,
-                shuffle=shuffle)
-
-        train_dataset = get_dataset(data_desc, 'train', args.num_train_samples)
-        eval_dataset = get_dataset(data_desc, 'eval', args.num_eval_samples)
+        pretrained_bert_model = nemo_nlp.nm.trainables.get_huggingface_model(
+            bert_config=args.bert_config, pretrained_model_name=args.pretrained_model_name
+        )
 
-    * DataLayer： 一个单独的层，可以用于在你的数据集中进行语义检查，并将它转换到DataLayerNM中。你需要定义 `input_ports` 和 `output_ports` 。
+    * 为我们的任务创建分类器。
 
     .. code-block:: python
 
-        data_layer = nemo_nlp.BertJointIntentSlotDataLayer(dataset,
-                                                batch_size=batch_size,
-                                                num_workers=0,
-                                                local_rank=local_rank)
-
-        ids, type_ids, input_mask, slot_mask, intents, slots = data_layer()
-
+        from nemo.collections.nlp.nm.trainables import JointIntentSlotClassifier
+        classifier = JointIntentSlotClassifier(
+            hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout
+        )
 
-    * 加载预训练好的模型，并得到相应输入的隐层状态。
+    * 为意图检测和槽填充创建损失函数，并使用损失累积模块将二者合并。
 
     .. code-block:: python
 
-        hidden_states = pretrained_bert_model(input_ids=ids,
-                                              token_type_ids=type_ids,
-                                              attention_mask=input_mask)
+        from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM
+        intent_loss_fn = CrossEntropyLossNM(logits_ndim=2)
+        slot_loss_fn = CrossEntropyLossNM(logits_ndim=3)
+        total_loss_fn = LossAggregatorNM(num_inputs=2, weights=[args.intent_loss_weight, 1.0 - args.intent_loss_weight])
 
-    * 为我们的任务创建一个分类器。
+    * 创建训练和测试过程的管道。每个管道拥有自己的数据层 (BertJointIntentSlotDataLayer)。数据层是一个单独用于数据语义检测的层，并可以把数据转换到 DataLayerNM 中，你需要定义 `input_ports` 和 `output_ports`。
 
     .. code-block:: python
 
-        classifier = nemo_nlp.JointIntentSlotClassifier(
-                                        hidden_size=hidden_size,
-                                        num_intents=num_intents,
-                                        num_slots=num_slots,
-                                        dropout=args.fc_dropout)
+        from nemo.collections.nlp.nm.data_layers import BertJointIntentSlotDataLayer
+        def create_pipeline(num_samples=-1, batch_size=32, data_prefix='train', is_training=True, num_gpus=1):
+            logging.info(f"Loading {data_prefix} data...")
+            data_file = f'{data_desc.data_dir}/{data_prefix}.tsv'
+            slot_file = f'{data_desc.data_dir}/{data_prefix}_slots.tsv'
+            shuffle = args.shuffle_data if is_training else False
 
-        intent_logits, slot_logits = classifier(hidden_states=hidden_states)
-
-    * 创建损失函数。
-
-    .. code-block:: python
-
-        loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=num_slots)
-
-        loss = loss_fn(intent_logits=intent_logits,
-                       slot_logits=slot_logits,
-                       input_mask=input_mask,
-                       intents=intents,
-                       slots=slots)
+            data_layer = BertJointIntentSlotDataLayer(
+                input_file=data_file,
+                slot_file=slot_file,
+                pad_label=data_desc.pad_label,
+                tokenizer=tokenizer,
+                max_seq_length=args.max_seq_length,
+                num_samples=num_samples,
+                shuffle=shuffle,
+                batch_size=batch_size,
+                ignore_extra_tokens=args.ignore_extra_tokens,
+                ignore_start_end=args.ignore_start_end,
+            )
+
+            input_data = data_layer()
+            data_size = len(data_layer)
+
+            logging.info(f'The length of data layer is {data_size}')
+
+            if data_size < batch_size:
+                logging.warning("Batch_size is larger than the dataset size")
+                logging.warning("Reducing batch_size to dataset size")
+                batch_size = data_size
+
+            steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
+            logging.info(f"Steps_per_epoch = {steps_per_epoch}")
+
+            hidden_states = pretrained_bert_model(
+                input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
+            )
+
+            intent_logits, slot_logits = classifier(hidden_states=hidden_states)
+
+            intent_loss = intent_loss_fn(logits=intent_logits, labels=input_data.intents)
+            slot_loss = slot_loss_fn(logits=slot_logits, labels=input_data.slots, loss_mask=input_data.loss_mask)
+            total_loss = total_loss_fn(loss_1=intent_loss, loss_2=slot_loss)
+
+            if is_training:
+                tensors_to_evaluate = [total_loss, intent_logits, slot_logits]
+            else:
+                tensors_to_evaluate = [
+                    intent_logits,
+                    slot_logits,
+                    input_data.intents,
+                    input_data.slots,
+                    input_data.subtokens_mask,
+                ]
+
+            return tensors_to_evaluate, total_loss, steps_per_epoch, data_layer
+
+
+        train_tensors, train_loss, train_steps_per_epoch, _ = create_pipeline(
+            num_samples=args.num_train_samples,
+            batch_size=args.batch_size,
+            data_prefix=args.train_file_prefix,
+            is_training=True,
+            num_gpus=args.num_gpus,
+        )
+        eval_tensors, _, _, eval_data_layer = create_pipeline(
+            num_samples=args.num_eval_samples,
+            batch_size=args.batch_size,
+            data_prefix=args.eval_file_prefix,
+            is_training=False,
+            num_gpus=args.num_gpus,
+        )
 
     * 创建相应的 callbacks ，来保存 checkpoints，打印训练过程和测试结果。
 
     .. code-block:: python
 
-        callback_train = nemo.core.SimpleLossLoggerCallback(
+        from nemo.collections.nlp.callbacks.joint_intent_slot_callback import eval_epochs_done_callback, eval_iter_callback
+        from nemo.core import CheckpointCallback, SimpleLossLoggerCallback
+        train_callback = SimpleLossLoggerCallback(
             tensors=train_tensors,
-            print_func=lambda x: str(np.round(x[0].item(), 3)),
+            print_func=lambda x: logging.info(str(round(x[0].item(), 3))),
             tb_writer=nf.tb_writer,
             get_tb_values=lambda x: [["loss", x[0]]],
-            step_freq=steps_per_epoch)
+            step_freq=steps_per_epoch,
+        )
 
-        callback_eval = nemo.core.EvaluatorCallback(
+        eval_callback = nemo.core.EvaluatorCallback(
             eval_tensors=eval_tensors,
-            user_iter_callback=lambda x, y: eval_iter_callback(
-                x, y, data_layer),
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
             user_epochs_done_callback=lambda x: eval_epochs_done_callback(
-                x, f'{nf.work_dir}/graphs'),
+                x,
+                intents_label_ids=data_desc.intents_label_ids,
+                slots_label_ids=data_desc.slots_label_ids,
+                graph_fold=f'{nf.work_dir}/graphs',
+                normalize_cm=True
+            ),
             tb_writer=nf.tb_writer,
-            eval_step=steps_per_epoch)
+            eval_step=train_steps_per_epoch,
+        )
 
-        ckpt_callback = nemo.core.CheckpointCallback(
-            folder=nf.checkpoint_dir,
-            epoch_freq=args.save_epoch_freq,
-            step_freq=args.save_step_freq)
+        ckpt_callback = CheckpointCallback(
+            folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
+        )
 
     * 最后，我们定义优化器的参数，并开始训练流程。
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
-        nf.train(tensors_to_optimize=[train_loss],
-             callbacks=[callback_train, callback_eval, ckpt_callback],
-             lr_policy=lr_policy_fn,
-             optimizer=args.optimizer_kind,
-             optimization_params={"num_epochs": num_epochs,
-                                  "lr": args.lr,
-                                  "weight_decay": args.weight_decay})
+        from nemo.utils.lr_policies import get_lr_policy
+        lr_policy_fn = get_lr_policy(
+            args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
+        )
+
+        nf.train(
+            tensors_to_optimize=[train_loss],
+            callbacks=[train_callback, eval_callback, ckpt_callback],
+            lr_policy=lr_policy_fn,
+            optimizer=args.optimizer_kind,
+            optimization_params={"num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay},
+        )
+
 
 模型训练
 --------
 
-为了训练一个意图识别和槽填充的混合任务，运行 ``nemo/examples/nlp`` 下的脚本 ``joint_intent_slot_with_bert.py`` ：
+为了训练一个意图识别和槽填充的混合任务，运行 ``examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py`` 下的脚本 ``joint_intent_slot_with_bert.py`` ：
 
     .. code-block:: python
 
-        python -m torch.distributed.launch --nproc_per_node=2 joint_intent_slot_with_bert.py \
-            --data_dir <path to data>
-            --work_dir <where you want to log your experiment> \
-            --max_seq_length \
-            --optimizer_kind
-            ...
+        cd examples/nlp/intent_detection_slot_tagging/
+        python joint_intent_slot_with_bert.py \
+            --data_dir <path to data>\
+            --work_dir <where you want to log your experiment>\
 
 测试的话，需要运行：
 
     .. code-block:: python
 
-        python -m joint_intent_slot_infer.py \
+        cd examples/nlp/intent_detection_slot_tagging/
+        python joint_intent_slot_infer.py \
             --data_dir <path to data> \
-            --work_dir <path to checkpoint folder>
+            --checkpoint_dir <path to checkpoint folder>\
 
 对一个检索进行测试，需要运行：
 
     .. code-block:: python
 
-        python -m joint_intent_slot_infer.py \
-            --work_dir <path to checkpoint folder>
+        cd examples/nlp/intent_detection_slot_tagging/
+        python joint_intent_slot_infer.py \
+            --checkpoint_dir <path to checkpoint folder>
             --query <query>
 
 
 参考文献
 --------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-SLOT
     :keyprefix: nlp-slot-
diff --git a/docs/docs_zh/sources/source/nlp/ner.rst b/docs/docs_zh/sources/source/nlp/ner.rst
index ea9af287efc0..e0147e074210 100644
--- a/docs/docs_zh/sources/source/nlp/ner.rst
+++ b/docs/docs_zh/sources/source/nlp/ner.rst
@@ -1,12 +1,25 @@
 教程
 ====
 
-在教程前，请确认你已经安装了 ``nemo`` 和 ``nemo_nlp`` 。你可以通过这个部分获得更多的信息 :ref:`installation`
+在教程前，请确认你已经安装了 ``nemo`` 和 ``nemo_nlp`` 。你可以通过这个部分获得更多的信息 :ref:`installation`。
+
+.. tip::
+
+    BERT预训练和预训练好的模型参见 `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
+
+
+.. _ner_tutorial:
 
 简介
 ----
 
-这个教程将介绍如何在NeMo中，实现命名实体识别(Named Entity Recognition, NER)。我们将通过一个预训练好的 BERT 模型来进行展示，或者你也可以使用一个训练好的模型！你可以通过 BERT 预训练教程获得更多的信息。
+这个教程将介绍如何在 NeMo 中，实现命名实体识别(Named Entity Recognition, NER)。我们将通过一个预训练好的 BERT 模型来进行展示，或者你也可以使用一个训练好的模型！你可以通过 BERT 预训练教程获得更多的信息。
+
+.. tip::
+
+    我们推荐你试试 Jupyter 这个工具，它会使得 debug 更加容易！
+    参见 examples/nlp/token_classification/NERWithBERT.ipynb
+    这部分所有代码均基于 :ref:`ner_scripts`。
 
 下载数据集
 ----------
@@ -32,72 +45,67 @@ labels.txt 需要满足的格式为：
 text.txt 每一行包含文本序列，其中词以空格来进行分隔。label.txt 中包含 text.txt 中每个词的标注，标注以空格分隔。文件中的每一行需要符合如下格式：
 [WORD] [SPACE] [WORD] [SPACE] [WORD] (在 text.txt 中) 和 [LABEL] [SPACE] [LABEL] [SPACE] [LABEL] (在 labels.txt中)。
 
-.. _script: https://github.com/NVIDIA/NeMo/tree/master/scripts/get_conll_data.py
+你可以使用 `this`_ 将CoNLL-2003数据集转换成用于训练的格式。
 
+.. _this: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/token_classification/import_from_iob_format.py
 
 训练
 ----
 
-.. tip::
-
-    我们建议试试使用Jupyter来运行这部分代码，这会使得调试更加容易!
-    详见 examples/nlp/NERWithBERT.ipynb
-
-首先，我们需要使用所支持的后端，来创建我们的 `Neural Factory` 。你需要确认使用多GPU或者混合精度训练。这个教程中我们使用单GPU训练，不使用混合精度(``optimization_level="O0"``)。如果你想使用混合精度训练，需要设置 ``amp_opt_level`` 这个参数为 ``O1`` 或者 ``O2`` 。
+首先，我们需要使用所支持的后端，来创建我们的 `Neural Factory` 。你需要确认使用多 GPU 或者混合精度训练。这个教程中我们使用单 GPU 训练，不使用混合精度(``optimization_level="O0"``)。如果你想使用混合精度训练，需要设置 ``amp_opt_level`` 这个参数为 ``O1`` 或者 ``O2`` 。
 
     .. code-block:: python
 
+        WORK_DIR = "path_to_output_dir"
         nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
-                                           local_rank=args.local_rank,
-                                           optimization_level=args.amp_opt_level,
-                                           log_dir=work_dir,
-                                           create_tb_writer=True,
-                                           files_to_copy=[__file__])
+                                           local_rank=None,
+                                           optimization_level="O0",
+                                           log_dir=WORK_DIR,
+                                           create_tb_writer=True)
 
-接着，我们需要定义我们的分词器 (tokenizer) 和 BERT 模型。你可以有多种方式来实现。注意，NER是大小写敏感的（"New York City"比"new york city"更容易被识别出来），所以我们建议使用区分大小写的模型。
+接着，我们需要定义我们的分词器 (tokenizer) 和 BERT 模型。你可以有多种方式来实现。注意，NER 是大小写敏感的（"New York City"比"new york city"更容易被识别出来），所以我们建议使用区分大小写的模型。
 
-如果你正在使用一个标准的 BERT 模型，我们建议你使用下面这条命令。想获取完整的 BERT 列表，可以参考 ``nemo_nlp.huggingface.BERT.list_pretrained_models()`` 。
+如果你正在使用一个标准的 BERT 模型，我们建议你使用下面这条命令。想获取完整的 BERT 列表，可以参考 ``nemo.collections.nlp.nm.trainables.get_bert_models_list()`` 。
 
     .. code-block:: python
 
-        tokenizer = NemoBertTokenizer(pretrained_model="bert-base-cased")
-        bert_model = nemo_nlp.huggingface.BERT(
+        tokenizer = nemo.collections.nlp.data.NemoBertTokenizer(pretrained_model="bert-base-cased")
+        bert_model = nemo_nlp.nm.trainables.huggingface.BERT(
             pretrained_model_name="bert-base-cased")
 
 查看 examples/nlp/token_classification.py 文件来获取如何使用自己预训练好的模型。
-现在，创建训练和验证数据集合:
+现在，创建训练和验证数据层:
 
     .. code-block:: python
-
-        train_data_layer = nemo_nlp.data.BertTokenClassificationDataLayer(
-            dataset_type="BertCornellNERDataset",
+    
+        train_data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationDataLayer(
             tokenizer=tokenizer,
-            input_file=os.path.join(DATA_DIR, "train.txt"),
+            text_file=os.path.join(DATA_DIR, 'text_train.txt'),
+            label_file=os.path.join(DATA_DIR, 'labels_train.txt'),
             max_seq_length=MAX_SEQ_LENGTH,
             batch_size=BATCH_SIZE)
 
-        eval_data_layer = nemo_nlp.data.BertTokenClassificationDataLayer(
-            dataset_type="BertCornellNERDataset",
+        label_ids = train_data_layer.dataset.label_ids
+        num_classes = len(label_ids)
+
+        eval_data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationDataLayer(
             tokenizer=tokenizer,
-            input_file=os.path.join(DATA_DIR, "dev.txt"),
+            text_file=os.path.join(DATA_DIR, 'text_dev.txt'),
+            label_file=os.path.join(DATA_DIR, 'labels_dev.txt'),
             max_seq_length=MAX_SEQ_LENGTH,
-            batch_size=BATCH_SIZE)
+            batch_size=BATCH_SIZE,
+            label_ids=label_ids)
 
 接着，我们需要在预先训练好的模型上，创建分类器并定义损失函数:
 
     .. code-block:: python
 
-        label_ids = train_data_layer.dataset.label_ids
-        num_classes = len(label_ids)
-
         hidden_size = bert_model.hidden_size
-        ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
+        ner_classifier = nemo.collections.nlp.nm.trainables.TokenClassifier(hidden_size=hidden_size,
                                               num_classes=num_classes,
                                               dropout=CLASSIFICATION_DROPOUT)
 
-        ner_loss = nemo_nlp.TokenClassificationLoss(d_model=hidden_size,
-                                                num_classes=len(label_ids),
-                                                dropout=CLASSIFICATION_DROPOUT)
+        ner_loss = CrossEntropyLossNM(logits_ndim=3)
 
 现在，创建训练和验证集合:
 
@@ -123,28 +131,12 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
 
         eval_logits = ner_classifier(hidden_states=hidden_states)
 
-    .. code-block:: python
-
-        train_tensors, train_loss, steps_per_epoch, label_ids, _ = create_pipeline()
-        eval_tensors, _, _, _, data_layer = create_pipeline(mode='dev')
-
 现在，我们设置3个回调函数：
 
 * `SimpleLossLoggerCallback` 打印出训练过程中的损失函数值
 * `EvaluatorCallback` 来验证我们dev集合上F1的值。在这个例子中， `EvaluatorCallback` 也会打印出 `output.txt` 上的预测值，这有利于找出模型哪个部分出了问题。
 * `CheckpointCallback` 用于保存和读取checkpoints.
 
-.. tip::
-
-    Tensorboard_ 是一个非常好用的调试工具。它在本教程中不是一个必须安装的工具，如果你想使用的话，需要先安装 tensorboardX_ 接着在微调过程中使用如下的命令：
-
-    .. code-block:: bash
-
-        tensorboard --logdir bert_ner_tb
-
-.. _Tensorboard: https://www.tensorflow.org/tensorboard
-.. _tensorboardX: https://github.com/lanpa/tensorboardX
-
     .. code-block:: python
 
         callback_train = nemo.core.SimpleLossLoggerCallback(
@@ -163,6 +155,13 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
             user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, label_ids),
             eval_step=steps_per_epoch)
 
+        # 用于保存 checkpoints
+        # 将会保存在 WORK_DIR 目录下
+        ckpt_callback = nemo.core.CheckpointCallback(
+            folder=nf.checkpoint_dir,
+            epoch_freq=1)
+
+
 最后，我们需要定义学习率规则和优化器，并且开始训练：
 
     .. code-block:: python
@@ -171,12 +170,26 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
                             warmup_ratio=LR_WARMUP_PROPORTION)
 
         nf.train(tensors_to_optimize=[train_loss],
-                 callbacks=[train_callback, eval_callback],
+                 callbacks=[train_callback, eval_callback, ckpt_callback],
                  lr_policy=lr_policy,
                  optimizer=OPTIMIZER,
                  optimization_params={"num_epochs": NUM_EPOCHS,
                                       "lr": LEARNING_RATE})
 
+
+.. tip::
+
+    Tensorboard_ 是一个非常好用的调试工具。它在本教程中不是一个必须安装的工具，如果你想使用的话，需要先安装 tensorboardX_ 接着在微调过程中使用如下的命令：
+
+    .. code-block:: bash
+    
+        tensorboard --logdir output_ner/tensorboard
+
+.. _Tensorboard: https://www.tensorflow.org/tensorboard
+.. _tensorboardX: https://github.com/lanpa/tensorboardX
+
+.. _ner_scripts:
+
 使用脚本训练新的 BERT 模型
 --------------------------
 
@@ -184,21 +197,21 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
 
 .. code-block:: bash
 
-    python token_classification.py --data_dir /data/ner/ --work_dir output_ner
+    python examples/nlp/token_classification/token_classification.py --data_dir path_to_data --work_dir path_to_output_dir
 
 测试:
 
 .. code-block:: bash
 
-    python token_classification_infer.py --labels_dict /data/ner/label_ids.csv
-    --work_dir output_ner/checkpoints/
+    python examples/nlp/token_classification/token_classification_infer.py --labels_dict path_to_data/label_ids.csv
+    --checkpoint_dir path_to_output_dir/checkpoints/
 
 注意，这里会在训练过程中，到 data_dir 目录下生成 label_ids.csv 文件。
 
 使用其它的 BERT 模型
 --------------------
 
-除了可以使用谷歌提供的预训练 BERT 模型和你自己训练的模型外，在NeMo中，也可以使用来自第三方的BERT模型，只要这个模型的参数可以加载到PyTorch中即可。例如，如果你想使用 SciBERT_ 来微调：
+除了可以使用谷歌提供的预训练 BERT 模型和你自己训练的模型外，在 NeMo 中，也可以使用来自第三方的BERT模型，只要这个模型的参数可以加载到 PyTorch 中即可。例如，如果你想使用 SciBERT_ 来微调：
 
 .. _SciBERT: https://github.com/allenai/scibert
 
@@ -219,7 +232,3 @@ text.txt 每一行包含文本序列，其中词以空格来进行分隔。label
     bert_model = nemo_nlp.huggingface.BERT(
         pretrained_model_name="scibert_scivocab_cased"
     )
-
-如果你想使用 TensorFlow 训练好的模型，例如 BioBERT ，你需要首先使用 Hugging Face 提供的 `model conversion script`_ 进行模型转换，再在 NeMo 中使用这个模型。
-
-.. _model conversion script: https://github.com/huggingface/pytorch-transformers/blob/master/pytorch_transformers/convert_tf_checkpoint_to_pytorch.py
diff --git a/docs/docs_zh/sources/source/nlp/neural_machine_translation.rst b/docs/docs_zh/sources/source/nlp/neural_machine_translation.rst
index acb708d639ba..11812581243e 100644
--- a/docs/docs_zh/sources/source/nlp/neural_machine_translation.rst
+++ b/docs/docs_zh/sources/source/nlp/neural_machine_translation.rst
@@ -1,7 +1,7 @@
 教程
 ====
 
-在本教程中我们将要实现基于 `Transformer 编码器-解码器结构 <https://arxiv.org/abs/1706.03762>`_ :cite:`nlp-nmt-vaswani2017attention` 的神经机器翻译系统。本教程中使用到的所有代码都基于 ``examples/nlp/nmt_tutorial.py`` 。
+在本教程中我们将要实现基于 `Transformer 编码器-解码器结构 <https://arxiv.org/abs/1706.03762>`_ :cite:`nlp-nmt-vaswani2017attention` 的神经机器翻译系统。本教程中使用到的所有代码都基于 ``examples/nlp/neural_machine_translation/machine_translation_tutorial.py`` 。
 
 预备知识
 --------
@@ -15,7 +15,7 @@
 
 我们使用 newstest2013 数据集作为验证集，并使用 newstest2014 数据集作为测试集。所有的数据集以及分词器（tokenizer）模型都可以从 `此处 <https://drive.google.com/open?id=1AErD1hEg16Yt28a-IGflZnwGTg9O27DT>`_ 下载。 在下面的步骤中，我们假设所有的数据都放置在 **<path_to_data>** 目录中。
 
-**资源.** 本教程中使用的训练脚本能够训练 Transformer-big 结构的 BERT 模型并在 newstest2014 数据集上达到 **29.2** BLEU / **28.5** SacreBLEU 的分数表现，在配备了多块 16GB Volta 架构图形处理器 的 NVIDIA's DGX-1 上仅需约 15 小时即可完成全部训练过程。同样的训练结果也能够使用更少的资源并通过增加梯度更新的次数来实现 :cite:`nlp-nmt-ott2018scaling` 。
+**资源.** 本教程中使用的训练脚本 ``examples/nlp/neural_machine_translation/machine_translation_tutorial.py`` 能够训练 Transformer-big 结构的 BERT 模型并在 newstest2014 数据集上达到 **29.2** BLEU / **28.5** SacreBLEU 的分数表现，在配备了多块 16GB Volta 架构图形处理器 的 NVIDIA's DGX-1 上仅需约 15 小时即可完成全部训练过程。同样的训练结果也能够使用更少的资源并通过增加梯度更新的次数来实现 :cite:`nlp-nmt-ott2018scaling` 。
 
 .. tip::
     在不指定任何训练参数的前提下运行训练脚本将会在一个很小的数据集（newstest2013）上开始训练，其中训练集包含 3000 个句子对，验证集包含 100 个句子对。这样训练能够更方便地对代码进行调试：如果一切设置正确，验证集的 BLEU 将很快就能 >99，验证集的损失（loss）也能很快就会 < 1.5。
@@ -154,7 +154,7 @@
 模型训练
 --------
 
-要想训练一个 Transformer-big 结构的神经机器翻译模型，请运行位于 ``nemo/examples/nlp`` 的 ``nmt_tutorial.py`` ：
+要想训练一个 Transformer-big 结构的神经机器翻译模型，请运行位于 ``examples/nlp/neural_machine_translation/machine_translation_tutorial.py`` 的 ``nmt_tutorial.py`` ：
 
     .. code-block:: python
 
@@ -187,7 +187,7 @@
 引用
 ----
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-NMT
     :keyprefix: nlp-nmt-
diff --git a/docs/sources/source/nlp/nlp_all.bib b/docs/docs_zh/sources/source/nlp/nlp_all_refs.bib
similarity index 79%
rename from docs/sources/source/nlp/nlp_all.bib
rename to docs/docs_zh/sources/source/nlp/nlp_all_refs.bib
index d6eb32017e20..950fc2e6e7f7 100644
--- a/docs/sources/source/nlp/nlp_all.bib
+++ b/docs/docs_zh/sources/source/nlp/nlp_all_refs.bib
@@ -129,4 +129,35 @@ @article{chen2019bert
   author={Chen, Qian and Zhuo, Zhu and Wang, Wen},
   journal={arXiv preprint arXiv:1902.10909},
   year={2019}
-}
\ No newline at end of file
+}
+
+
+@article{budzianowski2018multiwoz,
+  title={MultiWOZ-a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling},
+  author={Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, Inigo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica},
+  journal={arXiv preprint arXiv:1810.00278},
+  year={2018}
+}
+
+@article{eric2019multiwoz,
+  title={MultiWOZ 2.1: Multi-domain dialogue state corrections and state tracking baselines},
+  author={Eric, Mihail and Goel, Rahul and Paul, Shachi and Sethi, Abhishek and Agarwal, Sanchit and Gao, Shuyag and Hakkani-Tur, Dilek},
+  journal={arXiv preprint arXiv:1907.01669},
+  year={2019}
+}
+
+
+@article{wu2019transferable,
+  title={Transferable multi-domain state generator for task-oriented dialogue systems},
+  author={Wu, Chien-Sheng and Madotto, Andrea and Hosseini-Asl, Ehsan and Xiong, Caiming and Socher, Richard and Fung, Pascale},
+  journal={arXiv preprint arXiv:1905.08743},
+  year={2019}
+}
+
+
+@article{henderson2015machine,
+  title={Machine learning for dialog state tracking: A review},
+  author={Henderson, Matthew},
+  journal={research.google},
+  year={2015}
+}
diff --git a/docs/docs_zh/sources/source/nlp/punctuation.rst b/docs/docs_zh/sources/source/nlp/punctuation.rst
new file mode 100644
index 000000000000..b47abed77533
--- /dev/null
+++ b/docs/docs_zh/sources/source/nlp/punctuation.rst
@@ -0,0 +1,367 @@
+教程
+========
+
+
+ASR系统通常产生的文本是没有标点符号和不区分词的大小写。这个教程讲述了如果在 NeMo 中实现模型预测标点和为每个词预测是否要首字母大写，从而使得 ASR 的输出更加可读，并且提升下游的任务像是命名实体识别和机器翻译。我们会展示如何用一个预训练的 BERT 模型来训练这个网络。 
+
+.. tip::
+
+    我们建议你在 Jupyter notebook 中尝试这个例子，它位于 examples/nlp/token_classification/PunctuationWithBERT.ipynb.
+    
+    这个教程中的所有代码都基于 :ref:`punct_scripts`.
+    在 NeMo 中预训练 BERT 以及预训练好的模型 checkpoints 请参考 `BERT 预训练 <https://nvidia.github.io/NeMo/zh/nlp/bert_pretraining.html>`__.
+
+
+任务描述
+----------------
+
+对训练集中每个字我们要预测:
+
+1. 跟着这个词的标点符号和
+2. 这个词是否要首字母大写
+
+在这个模型中， 我们在预训练的 BERT 模型上联合训练 2 个 token 层面的分类器: 一个预测标点符号，另一个预测大小写。
+
+数据集
+-------
+
+模型可以运行在任何数据集上，只要它遵守下面的格式。这个教程中我们会用数据集 `Tatoeba collection of sentences`_. `This`_ 脚本下载和预处理数据集。
+
+.. _Tatoeba collection of sentences: https://tatoeba.org/eng
+.. _This: https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/token_classification/get_tatoeba_data.py
+
+
+训练集和验证集分成了两个文件: text.txt 以及 labels.txt。text.txt 文件的每行包含了文本序列，词之间用空格分割:
+[WORD] [SPACE] [WORD] [SPACE] [WORD], 例如:
+
+  ::
+    
+    when is the next flight to new york
+    the next flight is ...
+    ...
+
+文件 labels.txt 包含了 text.txt 中每个词的标签(label), 标注用空格分割.
+在 labels.txt 文件中的每个标签包含两个符号:
+
+* 标签的第一个符号表示这个词后面应该跟什么标点符号 (其中 ``O`` 表示不需要标点符号);
+* 第二个符号决定了这个词是否要大写(其中 ``U`` 说明这个词需要大写， ``O`` 表示不需要大写)
+
+我们在这个任务中只考虑逗号，句号和问号。剩下的标点符号都去除了。
+labels.txt 文件的每行都应该是下面这个格式的: 
+[LABEL] [SPACE] [LABEL] [SPACE] [LABEL] (labels.txt). 比如，在上面的 text.txt 文件中的标签应该是:
+
+::
+    
+    OU OO OO OO OO OO OU ?U 
+    OU OO OO OO ...
+    ...
+
+这个任务所有可能的标签是: ``OO``, ``,O``, ``.O``, ``?O``, ``OU``, ``,U``, ``.U``, ``?U``.
+
+代码概览
+-------------
+
+首先, 设置一些必须的参数:
+
+    .. code-block:: python
+        
+        DATA_DIR = "PATH_TO_WHERE_THE_DATA_IS"
+        WORK_DIR = "PATH_TO_WHERE_TO_STORE_CHECKPOINTS_AND_LOGS"
+        PRETRAINED_BERT_MODEL = "bert-base-uncased"
+
+        # 模型参数
+        BATCHES_PER_STEP = 1
+        BATCH_SIZE = 128
+        CLASSIFICATION_DROPOUT = 0.1
+        MAX_SEQ_LENGTH = 64
+        NUM_EPOCHS = 10
+        LEARNING_RATE = 0.00002
+        LR_WARMUP_PROPORTION = 0.1
+        OPTIMIZER = "adam"
+        STEP_FREQ = 200 # 决定了 loss 多久打印一次，checkpoint 多久保存一次
+        PUNCT_NUM_FC_LAYERS = 3
+        NUM_SAMPLES = 100000
+
+下载，预处理一部分的数据集 (Tatoeba collection of sentences), 运行:
+
+.. code-block:: bash
+        
+        python get_tatoeba_data.py --data_dir DATA_DIR --num_sample NUM_SAMPLES
+
+接着，我们需要用支持的后端创建 neural factory。 这个教程假设你在单卡 GPU 上训练，混精度 (``optimization_level="O1"``)。如果你不想用混精度训练，设置 ``optimization_level`` 为 ``O0``。
+
+    .. code-block:: python
+
+        nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
+                                           local_rank=None,
+                                           optimization_level="O1",
+                                           log_dir=WORK_DIR,
+                                           placement=nemo.core.DeviceType.GPU)
+
+然后，定义我们的分词器和 BERT 模型。如果你用标准的 BERT，你可以这么做。想要看所有 BERT O型的名字，可以查看 ``nemo.collections.nlp.nm.trainables.get_bert_models_list()``. \
+``
+
+    .. code-block:: python
+
+        tokenizer = nemo.collections.nlp.data.NemoBertTokenizer(pretrained_model=PRETRAINED_BERT_MODEL)
+        bert_model = nemo_nlp.nm.trainables.huggingface.BERT(
+            pretrained_model_name=PRETRAINED_BERT_MODEL)
+
+现在, 创建验证和训练的数据层:
+
+    .. code-block:: python
+
+        train_data_layer = nemo_nlp.nm.data_layers.PunctuationCapitalizationDataLayer(
+                                            tokenizer=tokenizer,
+                                            text_file=os.path.join(DATA_DIR, 'text_train.txt'),
+                                            label_file=os.path.join(DATA_DIR, 'labels_train.txt'),
+                                            max_seq_length=MAX_SEQ_LENGTH,
+                                            batch_size=BATCH_SIZE)
+
+        punct_label_ids = train_data_layer.dataset.punct_label_ids
+        capit_label_ids = train_data_layer.dataset.capit_label_ids
+
+        hidden_size = bert_model.hidden_size
+
+        # 注意你需要指定 punct_label_ids 和 capit_label_ids  - 它们是在创建train_data_layer
+        # 映射标签到标签id(label_ids)时候生成的
+        # 目的是为了确保映射是正确的，
+        # 防止一些训练集的标签在验证集上丢失
+        eval_data_layer = nemo_nlp.BertPunctuationCapitalizationDataLayer(
+                                            tokenizer=tokenizer,
+                                            text_file=os.path.join(DATA_DIR, 'text_dev.txt'),
+                                            label_file=os.path.join(DATA_DIR, 'labels_dev.txt'),
+                                            max_seq_length=MAX_SEQ_LENGTH,
+                                            batch_size=BATCH_SIZE,
+                                            punct_label_ids=punct_label_ids,
+                                            capit_label_ids=capit_label_ids)
+
+
+现在，在预训练 BERT 模型上创建标签和大写分类器并且定义这个任务的损失函数:
+
+  .. code-block:: python
+
+      punct_classifier = TokenClassifier(
+                                         hidden_size=hidden_size,
+                                         num_classes=len(punct_label_ids),
+                                         dropout=CLASSIFICATION_DROPOUT,
+                                         num_layers=PUNCT_NUM_FC_LAYERS,
+                                         name='Punctuation')
+
+      capit_classifier = TokenClassifier(hidden_size=hidden_size,
+                                         num_classes=len(capit_label_ids),
+                                         dropout=CLASSIFICATION_DROPOUT,
+                                         name='Capitalization')
+
+
+      # 如果你不想在标点符号任务上用加权损失函数，设置 class_weights=None
+      punct_label_freqs = train_data_layer.dataset.punct_label_frequencies
+      class_weights = nemo.collections.nlp.data.datasets.datasets_utils.calc_class_weights(punct_label_freqs)
+
+      # 定义损失函数
+      punct_loss = CrossEntropyLossNM(logits_ndim=3, weight=class_weights)
+      capit_loss = CrossEntropyLossNM(logits_ndim=3)
+      task_loss = LossAggregatorNM(num_inputs=2)
+
+
+下面，通过预训练的 BERT 模型，我们传递数据层的输出给到分类器:
+
+  .. code-block:: python
+
+      input_ids, input_type_ids, input_mask, loss_mask, subtokens_mask, punct_labels, capit_labels = train_data_layer()
+
+      hidden_states = bert_model(input_ids=input_ids,
+                            token_type_ids=input_type_ids,
+                            attention_mask=input_mask)
+
+      punct_logits = punct_classifier(hidden_states=hidden_states)
+      capit_logits = capit_classifier(hidden_states=hidden_states)
+
+      punct_loss = punct_loss(logits=punct_logits,
+                              labels=punct_labels,
+                              loss_mask=loss_mask)
+      capit_loss = capit_loss(logits=capit_logits,
+                              labels=capit_labels,
+                              loss_mask=loss_mask)
+      task_loss = task_loss(loss_1=punct_loss,
+                            loss_2=capit_loss)
+
+      eval_input_ids, eval_input_type_ids, eval_input_mask, _, eval_subtokens_mask, eval_punct_labels, eval_capit_labels\
+          = eval_data_layer()
+
+      hidden_states = bert_model(input_ids=eval_input_ids,
+                                 token_type_ids=eval_input_type_ids,
+                                 attention_mask=eval_input_mask)
+
+      eval_punct_logits = punct_classifier(hidden_states=hidden_states)
+      eval_capit_logits = capit_classifier(hidden_states=hidden_states)
+
+
+
+现在，我们设置我们的回调函数。我们用3个回调函数:
+
+* `SimpleLossLoggerCallback` 打印训练过程中的损失函数值
+* `EvaluatorCallback` 计算验证集上的数据指标
+* `CheckpointCallback` 用来保存和还原 checkpoints
+
+    .. code-block:: python
+
+        callback_train = nemo.core.SimpleLossLoggerCallback(
+        tensors=[task_loss, punct_loss, capit_loss, punct_logits, capit_logits],
+        print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
+        step_freq=STEP_FREQ)
+
+        train_data_size = len(train_data_layer)
+
+        # 如果你用多 GPUs，这行应该是
+        # train_data_size / (batch_size * batches_per_step * num_gpus)
+        steps_per_epoch = int(train_data_size / (BATCHES_PER_STEP * BATCH_SIZE))
+
+        # 回调评估模型
+        callback_eval = nemo.core.EvaluatorCallback(
+            eval_tensors=[eval_punct_logits,
+                          eval_capit_logits,
+                          eval_punct_labels,
+                          eval_capit_labels,
+                          eval_subtokens_mask],
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
+            user_epochs_done_callback=lambda x: eval_epochs_done_callback(x,
+                                                                          punct_label_ids,
+                                                                          capit_label_ids),
+            eval_step=steps_per_epoch)
+
+        # 回调保存 checkpoints
+        ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
+                                                     step_freq=STEP_FREQ)
+
+最后，定义学习率策略和我们的优化器，开始训练:
+
+    .. code-block:: python
+
+        lr_policy = WarmupAnnealing(NUM_EPOCHS * steps_per_epoch,
+                            warmup_ratio=LR_WARMUP_PROPORTION)
+
+        nf.train(tensors_to_optimize=[task_loss],
+                 callbacks=[callback_train, callback_eval, ckpt_callback],
+                 lr_policy=lr_policy,
+                 batches_per_step=BATCHES_PER_STEP,
+                 optimizer=OPTIMIZER,
+                 optimization_params={"num_epochs": NUM_EPOCHS,
+                                      "lr": LEARNING_RATE})
+
+推理
+---------
+
+为了看看模型的推理预测，我们在一些样本上运行推理。我们需要定义一个数据层，就像我们为训练和验证评估那样创建的数据层。
+
+.. code-block:: python
+
+    queries = ['can i help you',
+               'yes please',
+               'we bought four shirts from the nvidia gear store in santa clara',
+               'we bought four shirts one mug and ten thousand titan rtx graphics cards',
+               'the more you buy the more you save']
+    infer_data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationInferDataLayer(
+                                                            queries=queries,
+                                                            tokenizer=tokenizer,
+                                                            max_seq_length=MAX_SEQ_LENGTH,
+                                                            batch_size=1)
+
+
+运行推理，基于训练结果加上标点符号和单词大写:
+
+.. code-block:: python
+
+    input_ids, input_type_ids, input_mask, _, subtokens_mask = infer_data_layer()
+
+    hidden_states = bert_model(input_ids=input_ids,
+                                          token_type_ids=input_type_ids,
+                                          attention_mask=input_mask)
+    punct_logits = punct_classifier(hidden_states=hidden_states)
+    capit_logits = capit_classifier(hidden_states=hidden_states)
+
+    evaluated_tensors = nf.infer(tensors=[punct_logits, capit_logits, subtokens_mask],
+                                 checkpoint_dir=WORK_DIR + '/checkpoints')
+
+
+
+    # 帮助函数
+    def concatenate(lists):
+        return np.concatenate([t.cpu() for t in lists])
+
+    punct_ids_to_labels = {punct_label_ids[k]: k for k in punct_label_ids}
+    capit_ids_to_labels = {capit_label_ids[k]: k for k in capit_label_ids}
+
+    punct_logits, capit_logits, subtokens_mask = [concatenate(tensors) for tensors in evaluated_tensors]
+    punct_preds = np.argmax(punct_logits, axis=2)
+    capit_preds = np.argmax(capit_logits, axis=2)
+
+    for i, query in enumerate(queries):
+        logging.info(f'Query: {query}')
+
+        punct_pred = punct_preds[i][subtokens_mask[i] > 0.5]
+        capit_pred = capit_preds[i][subtokens_mask[i] > 0.5]
+        words = query.strip().split()
+        if len(punct_pred) != len(words) or len(capit_pred) != len(words):
+            raise ValueError('Pred and words must be of the same length')
+
+        output = ''
+        for j, w in enumerate(words):
+            punct_label = punct_ids_to_labels[punct_pred[j]]
+            capit_label = capit_ids_to_labels[capit_pred[j]]
+
+            if capit_label != 'O':
+                w = w.capitalize()
+            output += w
+            if punct_label != 'O':
+                output += punct_label
+            output += ' '
+        logging.info(f'Combined: {output.strip()}\n')
+
+预测结果:
+    
+    ::
+
+        Query: can i help you
+        Combined: Can I help you?
+
+        Query: yes please
+        Combined: Yes, please.
+
+        Query: we bought four shirts from the nvidia gear store in santa clara
+        Combined: We bought four shirts from the Nvidia gear store in Santa Clara.
+
+        Query: we bought four shirts one mug and ten thousand titan rtx graphics cards
+        Combined: We bought four shirts, one mug, and ten thousand Titan Rtx graphics cards.
+
+        Query: the more you buy the more you save
+        Combined: The more you buy, the more you save.
+
+.. _punct_scripts:
+
+训练和推理脚本
+------------------------------
+
+运行提供的训练脚本:
+
+.. code-block:: bash
+
+    python examples/nlp/token_classification/punctuation_capitalization.py --data_dir path_to_data --pretrained_model_name=bert-base-uncased --work_dir path_to_output_dir
+
+运行推理:
+
+.. code-block:: bash
+
+    python examples/nlp/token_classification/punctuation_capitalization_infer.py --punct_labels_dict path_to_data/punct_label_ids.csv --capit_labels_dict path_to_data/capit_label_ids.csv --checkpoint_dir path_to_output_dir/checkpoints/
+
+注意, punct_label_ids.csv 和 capit_label_ids.csv 文件在训练的时候会生成并且存在 data_dir 文件目录下。
+
+多 GPU 训练
+------------------
+
+在多张 GPU 上训练，运行
+
+.. code-block:: bash
+
+    export NUM_GPUS=2
+    python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS examples/nlp/token_classification/punctuation_capitalization.py --num_gpus $NUM_GPUS --data_dir path_to_data
diff --git a/docs/docs_zh/sources/source/nlp/question_answering.rst b/docs/docs_zh/sources/source/nlp/question_answering.rst
new file mode 100644
index 000000000000..978eaa139b5e
--- /dev/null
+++ b/docs/docs_zh/sources/source/nlp/question_answering.rst
@@ -0,0 +1,278 @@
+教程
+========
+
+在这个教程中，我们会在 SQuAD 数据集上训练一个问答系统。模型结构用的是预训练的类 BERT 的模型
+`BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding <https://arxiv.org/abs/1810.04805>`_ :cite:`nlp-qa-devlin2018bert`.
+这个教程中所有的代码都基于 ``examples/nlp/question_answering/question_answering_squad.py``。
+
+
+目前支持三个主要的预训练模型，在这些模型上，用 SQuAD 数据集进行问答任务的微调(fine-tuning):
+BERT, ALBERT and RoBERTa. 这些预训练模型的 checkpoints 来自 `transformers <https://huggingface.co/transformers>`__ . 除了这些，用户也可以在自定义的
+BERT checkpoint 上做微调(fine-tuning)，通过制定 `--bert_checkpoint` 参数。
+预训练的主要模型的类型可以用 `--model_type` 指定，具体的某个模型用参数 `--pretrained_model_name` 指定。
+已有的预训练模型参见这个列表:
+`这里 <https://huggingface.co/transformers/pretrained_models.html>`__. 
+
+.. tip::
+
+    如果要在 NeMo 中预训练 BERT 以及预训练好的模型 checkpoints 请参阅 `BERT pretraining <https://nvidia.github.io/NeMo/zh/nlp/bert_pretraining.html>`__.
+
+
+
+准备工作
+-------------
+
+**模型细节**
+
+这个模型是训练在 token 层面的分类器上，预测在上下文中答案的开始和结束位置。
+损失函数值是答案开始 `S_loss` 和答案结束 `E_loss` 的交叉熵损失函数值:
+
+        `S_loss` + `E_loss`
+
+推理(inference)的时候，使得损失值最小的最长答案范围被用来作为预测的答案。
+
+**数据集** 
+
+模型可以处理下面这种格式的任意数据集:
+
+    * 训练文件: 一个 `json` 文件，结构如下
+
+    {"data":[{"title": "string", "paragraphs": [{"context": "string", "qas": [{"question": "string", "is_impossible": "bool", "id": "number", "answers": [{"answer_start": "number", "text": "string", }]}]}]}]}
+    "answers" 可以为空，如果模型要学习的问题的是无解的(impossible)，如果是这样的，需要传入参数 `--version_2_with_negative`
+
+    * 验证集文件: 一个 `json` 文件和训练文件结构一样，
+      除了它可以对同一个问题提供多个 "answer" 答案。
+     
+
+    * 测试文件: 一个 `json` 文件和训练文件结构一样，
+      但它并不要求有 "answers" 这个键值。 
+
+目前我们为其提供预处理脚本的数据集是 SQuAD v1.1 和 v2.0 
+可以从这里下载:
+数据集 `https://rajpurkar.github.io/SQuAD-explorer/ <https://rajpurkar.github.io/SQuAD-explorer/>`_.
+预处理脚本位于 ``examples/nlp/question_answering/get_squad.py``。
+
+
+代码结构
+--------------
+
+首先，初始化神经模块工厂( Neural Module Factory)，它定义了 1) 后端 (PyTorch), 2) 混精度优化等级,
+3) GPU的本地秩(local rank), 以及 4) 实验管理器(experiment manager)会创建带时间戳的文件夹来存储 checkpoints，相关输出，日志文件，以及 TensorBoard 的图。
+
+    .. code-block:: python
+    
+        import nemo
+        import nemo.collections.nlp as nemo_nlp
+        nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
+                                               local_rank=args.local_rank,
+                                               optimization_level=args.amp_opt_level,
+                                               log_dir=work_dir,
+                                               create_tb_writer=True,
+                                               files_to_copy=[__file__],
+                                               add_time_to_log_dir=True)
+
+接着，我们定义参与到我们的问答分类问题管道中的神经模块:
+
+    * 处理数据: `BertQuestionAnsweringDataLayer` 对裸数据处理成 `SquadDataset` 接受的数据格式。
+    
+    训练和验证评估(evaluation)都需要它们自己的 `BertQuestionAnsweringDataLayer` 数据层。
+    数据层(DataLayer)是一个用来为你的数据集做额外的语义检查并把它转换成数据层神经模块(DataLayerNM)的层。 
+
+    .. code-block:: python
+
+        data_layer = nemo_nlp.nm.data_layers.BertQuestionAnsweringDataLayer(
+                                mode="train",
+                                data_file=args.train_file,
+                                tokenizer=tokenizer,
+                                batch_size=args.batch_size,
+                                version_2_with_negative=args.version_2_with_negative,
+                                max_query_length=args.max_query_length,
+                                max_seq_length=args.max_seq_length,
+                                doc_stride=args.doc_stride,
+                                use_cache=args.use_data_cache)
+
+        
+        data_layer_eval = nemo_nlp.nm.data_layers.BertQuestionAnsweringDataLayer(
+                                mode='eval',
+                                data_file=args.eval_file,
+                                tokenizer=tokenizer,
+                                batch_size=args.batch_size,
+                                version_2_with_negative=args.version_2_with_negative,
+                                max_query_length=args.max_query_length,
+                                max_seq_length=args.max_seq_length,
+                                doc_stride=args.doc_stride,
+                                use_cache=args.use_data_cache)
+
+    * 加载预训练模型，得到相应输入的隐状态(hidden states)。
+
+    .. code-block:: python
+        
+        args.pretrained_model_name = "bert-base-uncased"
+        model = nemo_nlp.nm.trainables.huggingface.BERT(args.pretrained_model_name)
+        # 如果模型是 RoBERTa
+        args.pretrained_model_name = "roberta-base"
+        model = nemo_nlp.nm.trainables.huggingface.Roberta(args.pretrained_model_name)
+        # 或者是 Albert
+        args.pretrained_model_name = "albert-base-v1"
+        model = nemo_nlp.nm.trainables.huggingface.Albert(args.pretrained_model_name)
+
+    * 定义分词器，这里用  `NemoBertTokenizer` 把文本转换成 BERT 的 tokens。这会按照原始的 BERT 模型那样切分文本。
+
+    .. code-block:: python
+
+        hidden_size = model.hidden_size
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model=args.pretrained_model_name)
+
+
+    * 为我们的任务创建分类器的头部(head)。
+
+    .. code-block:: python
+
+        qa_head = nemo_nlp.nm.trainables.TokenClassifier(
+                                hidden_size=hidden_size,
+                                num_classes=2,
+                                num_layers=1,
+                                log_softmax=False)
+
+    * 创建损失函数
+
+    .. code-block:: python
+
+        loss_fn = nemo_nlp.nm.losses.SpanningLoss()
+
+    * 为训练和验证评估过程创建管道 
+
+    .. code-block:: python
+
+        # training graph
+        input_data = data_layer()
+        hidden_states = model(input_ids=input_data.input_ids,
+                        token_type_ids=input_data.input_type_ids,
+                        attention_mask=input_data.input_mask)
+
+        qa_logits = qa_head(hidden_states=hidden_states)
+        loss_outputs = squad_loss(
+            logits=qa_logits,
+            start_positions=input_data.start_positions,
+            end_positions=input_data.end_positions)
+        train_tensors = [loss_outputs.loss]
+
+        # 评估图
+        input_data_eval = data_layer_eval()
+
+        hidden_states_eval = model(
+            input_ids=input_data_eval.input_ids,
+            token_type_ids=input_data_eval.input_type_ids,
+            attention_mask=input_data_eval.input_mask)
+
+        qa_logits_eval = qa_head(hidden_states=hidden_states_eval)
+        loss_outputs_eval = squad_loss(
+            logits=qa_logits_eval,
+            start_positions=input_data_eval.start_positions,
+            end_positions=input_data_eval.end_positions)
+        eval_tensors = [input_data_eval.unique_ids, loss_outputs_eval.start_logits, loss_outputs_eval.end_logits]
+
+
+
+    * 创建回调，保存 checkpoints，打印训练过程和验证评估结果。
+
+    .. code-block:: python
+
+        train_callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=train_tensors,
+            print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
+            get_tb_values=lambda x: [["loss", x[0]]],
+            step_freq=args.step_freq,
+            tb_writer=neural_factory.tb_writer)
+
+
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=eval_tensors,
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
+            user_epochs_done_callback=lambda x:
+                eval_epochs_done_callback(
+                    x, eval_data_layer=data_layer_eval,
+                    do_lower_case=args.do_lower_case,
+                    n_best_size=args.n_best_size,
+                    max_answer_length=args.max_answer_length,
+                    version_2_with_negative=args.version_2_with_negative,
+                    null_score_diff_threshold=args.null_score_diff_threshold),
+                tb_writer=neural_factory.tb_writer,
+                eval_step=args.eval_step_freq)
+
+        ckpt_callback = nemo.core.CheckpointCallback(
+            folder=nf.checkpoint_dir,
+            epoch_freq=args.save_epoch_freq,
+            step_freq=args.save_step_freq)
+
+    * 最后，定义优化器参数，运行整个管道
+
+    .. code-block:: python
+
+        lr_policy_fn = get_lr_policy(args.lr_policy,
+                                     total_steps=args.num_epochs * steps_per_epoch,
+                                     warmup_ratio=args.lr_warmup_proportion)
+
+        nf.train(tensors_to_optimize=train_tensors,
+                 callbacks=[train_callback, eval_callback, ckpt_callback],
+                 lr_policy=lr_policy_fn,
+                 optimizer=args.optimizer_kind,
+                 optimization_params={"num_epochs": args.num_epochs,
+                                      "lr": args.lr,
+                                      "weight_decay": args.weight_decay})
+
+模型训练
+--------------
+
+跑在单张 GPU，运行:
+    
+    .. code-block:: python
+
+        python question_answering_squad.py \
+            ...
+            
+用多卡跑 SQuAD 问答任务，运行 ``question_answering_squad.py`` ，它位于 ``examples/nlp/question_answering``:
+
+    .. code-block:: python
+
+        python -m torch.distributed.launch --nproc_per_node=8 question_answering_squad.py 
+            --train_file <*.json 格式的训练文件>
+            --eval_file <*.json 格式的验证评估文件>
+            --num_gpus 8
+            --work_dir <你想在哪里记录你的实验> 
+            --amp_opt_level <amp 优化等级> 
+            --pretrained_model_name <模型类型> 
+            --bert_checkpoint <预训练的 bert checkpoint>
+            --mode "train_eval"
+            ...
+
+运行评估:
+
+    .. code-block:: python
+
+        python question_answering_squad.py 
+            --eval_file <*.json 格式的验证评估文件>
+            --checkpoint_dir <已经训练好的 SQuAD 模型的 checkpoint 的文件夹>
+            --mode "eval"
+            --output_prediction_file <预测结果的输出文件>
+            ...
+
+运行推理:
+
+    .. code-block:: python
+
+        python question_answering_squad.py 
+            --test_file <*.json 格式的验证评估文件>
+            --checkpoint_dir <已经训练好的 SQuAD 模型的 checkpoint 的文件夹>
+            --mode "test"
+            --output_prediction_file <预测结果的输出文件>
+            ...
+
+
+参考
+----------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-QA
+    :keyprefix: nlp-qa-
\ No newline at end of file
diff --git a/docs/docs_zh/sources/source/nlp/transformer_language_model.rst b/docs/docs_zh/sources/source/nlp/transformer_language_model.rst
index 8ed88ec9f3b4..63c2e82d87c0 100644
--- a/docs/docs_zh/sources/source/nlp/transformer_language_model.rst
+++ b/docs/docs_zh/sources/source/nlp/transformer_language_model.rst
@@ -1,19 +1,21 @@
 Transformer语言模型
 ===================
 
-在这个教程中，我们会用Transformer :cite:`nlp-lm-vaswani2017attention` 的结构构建和训练一个语言模型。确保在开始这个教程之前你已经安装了 ``nemo`` 和 ``nemo_nlp`` ，详见 :ref:`installation` 。
+在这个教程中，我们会用Transformer :cite:`nlp-lm-vaswani2017attention` 的结构构建和训练一个语言模型。
+确保在开始这个教程之前你已经安装了 ``nemo`` 和 ``nemo_nlp`` ，详见 :ref:`installation` 。
 
 简介
 ----
 
 一个好的语言模型对于下游任务有很广泛的应用。用于下游任务的语言模型例子包括 GPT-2 :cite:`nlp-lm-radford2019language` 。
 
+
 下载语料
 --------
 
 在这个实验中我们会使用非常小的WikiText-2数据集 :cite:`nlp-lm-merity2016pointer` 。
 
-下载数据集，运行脚本 ``examples/nlp/scripts/get_wt2.sh``. 下载和解压数据集后，文件夹会包括三个文件:
+下载数据集，运行脚本 ``examples/nlp/language_modeling/get_wkt2.sh <FOLDER_FOR_DATA>``. 下载和解压数据集后，文件夹会包括三个文件:
 
     .. code-block:: bash
 
@@ -27,23 +29,26 @@ Transformer语言模型
 
     .. code-block:: python
 
+        from nemo.collections.nlp.data.datasets.lm_transformer_dataset import LanguageModelDataDesc
         data_desc = LanguageModelDataDesc(
             args.dataset_name, args.data_dir, args.do_lower_case)
 
-我们需要定义我们的分词器， 我们用定义在 ``nemo_nlp/data/tokenizers/word_tokenizer.py`` 中的 `WordTokenizer`:
+我们需要定义我们的分词器， 我们用定义在 ``nemo/collections/nlp/data/tokenizers/word_tokenizer.py`` 中的 `WordTokenizer`:
 
     .. code-block:: python
 
+        import nemo.collections.nlp as nemo_nlp
         tokenizer = nemo_nlp.WordTokenizer(f"{args.data_dir}/{args.tokenizer_model}")
         vocab_size = 8 * math.ceil(tokenizer.vocab_size / 8)
 
     .. tip::
-        让词嵌入的大小（或者其他张量的维度）能够整除8会帮助得到最好的GPU利用率，以及混精度训练的加速。
+        让词嵌入的大小（或者其他张量的维度）能够整除 8 
+        会帮助得到最好的 GPU 利用率，以及混精度训练的加速。
 
 创建模型
 ----------------
-首先我们需要用支持的后端来创建 ``neural factory`` 。你如何定义它取决于你想做多 GPU 训练或者是混合精度训练。这个教程假设你不用混合精度，在一块 GPU 上做训练。
-如果你想做混合精度训练，设置 ``amp_opt_level`` 为 ``O1`` 或者 ``O2`` 。
+首先我们需要用支持的后端来创建 ``neural factory`` 。你如何定义它取决于你想做多 GPU 训练或者是混合精度训练。
+这个教程假设你不用混合精度，在一块 GPU 上做训练。如果你想做混合精度训练，设置 ``amp_opt_level`` 为 ``O1`` 或者 ``O2`` 。
 
     .. code-block:: python
 
@@ -62,51 +67,72 @@ Transformer语言模型
 
     .. code-block:: python
 
-        encoder = nemo_nlp.TransformerEncoderNM(**params)
-        log_softmax = nemo_nlp.TokenClassifier(**params)
-        loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(**params)
-
+        from nemo.collections.nlp.nm.trainables.common import TokenClassifier
+        from nemo.collections.nlp.nm.losses import SmoothedCrossEntropyLoss
+
+        encoder = nemo_nlp.nm.trainables.TransformerEncoderNM(
+            d_model=args.d_model,
+            d_inner=args.d_inner,
+            num_layers=args.num_layers,
+            embedding_dropout=args.embedding_dropout,
+            num_attn_heads=args.num_attn_heads,
+            ffn_dropout=args.ffn_dropout,
+            vocab_size=vocab_size,
+            mask_future=True,
+            attn_score_dropout=args.attn_score_dropout,
+            attn_layer_dropout=args.attn_layer_dropout,
+            max_seq_length=args.max_seq_length,
+        )
+
+        log_softmax = TokenClassifier(
+            args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True
+        )
+
+        loss = SmoothedCrossEntropyLoss(pad_id=tokenizer.pad_id, label_smoothing=args.label_smoothing)
 
 根据 `Press and Wolf, 2016 <https://arxiv.org/abs/1608.05859>`_ :cite:`nlp-lm-press2016using`, 我们也会把词嵌入的参数和 softmax 层连起来:
 
     .. code-block:: python
 
-        log_softmax.mlp.layers[-1].weight = encoder.embedding_layer.token_embedding.weight
-
+        from nemo.core import WeightShareTransform
+        log_softmax.tie_weights_with(
+            encoder,
+            weight_names=["mlp.layer0.weight"],
+            name2name_and_transform={
+                "mlp.layer0.weight": ("embedding_layer.token_embedding.weight", WeightShareTransform.SAME)
+            },
+        )
 
-接着，我们为训练和评估创建数据集:
+接着，我们创建从输入到输出的管道，用作训练和评估:
 
     .. code-block:: python
 
-        train_dataset = nemo_nlp.LanguageModelingDataset(
-            tokenizer,
-            dataset=f"{args.data_dir}/{args.train_dataset}",
-            max_sequence_length=args.max_sequence_length,
-            batch_step=args.max_sequence_length)
-
-        eval_dataset = nemo_nlp.LanguageModelingDataset(
-            tokenizer,
-            dataset=f"{args.data_dir}/{args.eval_datasets[0]}",
-            max_sequence_length=args.max_sequence_length,
-            batch_step=args.predict_last_k)
-
-
-然后,我们创建用于训练和评估的从输入到输出的管道:
+        from nemo.collections.nlp.nm.data_layers import LanguageModelingDataLayer
 
-    .. code-block:: python
-
-        def create_pipeline(dataset, batch_size):
-            data_layer = nemo_nlp.LanguageModelingDataLayer(dataset,
-                                                            batch_size=batch_size)
+        def create_pipeline(
+            dataset, max_seq_length=args.max_seq_length, batch_step=args.max_seq_length, batch_size=args.batch_size
+        ):
+            data_layer = LanguageModelingDataLayer(
+                dataset, tokenizer, max_seq_length, batch_size, batch_step
+            )
             src, src_mask, labels = data_layer()
             src_hiddens = encoder(input_ids=src, input_mask_src=src_mask)
             logits = log_softmax(hidden_states=src_hiddens)
-            return loss(logits=logits, target_ids=labels)
-
-
-        train_loss = create_pipeline(train_dataset, args.batch_size)
-        eval_loss = create_pipeline(eval_dataset, args.batch_size)
-
+            return loss(logits=logits, labels=labels)
+
+
+        train_loss = create_pipeline(
+            f"{args.data_dir}/{args.train_dataset}",
+            args.max_seq_length,
+            batch_step=args.max_seq_length,
+            batch_size=args.batch_size,
+        )
+        eval_loss = create_pipeline(
+            f"{args.data_dir}/{args.eval_dataset}",
+            args.max_seq_length,
+            batch_step=args.predict_last_k,
+            batch_size=args.eval_batch_size,
+        )
 
 接下来，我们定义一些必要的回调:
 
@@ -116,32 +142,59 @@ Transformer语言模型
 
     .. code-block:: python
 
-        train_callback = nemo.core.SimpleLossLoggerCallback(...)
-        eval_callback = nemo.core.EvaluatorCallback(...)
-        ckpt_callback = nemo.core.CheckpointCallback(...)
+        from nemo.collections.nlp.callbacks.lm_transformer_callback import eval_epochs_done_callback, eval_iter_callback
+        train_callback = SimpleLossLoggerCallback(
+            tensors=train_tensors,
+            print_func=lambda x: logging.info(str(round(x[0].item(), 3))),
+            tb_writer=nf.tb_writer,
+            get_tb_values=lambda x: [["loss", x[0]]],
+            step_freq=steps_per_epoch,
+        )
+
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=eval_tensors,
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y, data_layer),
+            user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),
+            tb_writer=nf.tb_writer,
+            eval_step=steps_per_epoch,
+        )
+
+        # Create callback to save checkpoints
+        ckpt_callback = CheckpointCallback(
+            folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
+        )
 
 最后，定义优化器，开始训练吧！
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
-
-        nf.train(tensors_to_optimize=[train_loss],
-                 callbacks=callbacks,
-                 lr_policy=lr_policy_fn,
-                 batches_per_step=args.iter_per_step,
-                 optimizer=args.optimizer_kind,
-                 optimization_params={"num_epochs": args.num_epochs,
-                                      "lr": args.lr,
-                                      "weight_decay": args.weight_decay,
-                                      "betas": (args.beta1, args.beta2)})
+        from nemo.utils.lr_policies import CosineAnnealing
+
+        lr_policy_fn = CosineAnnealing(args.max_steps, warmup_steps=args.warmup_steps)
+        max_num_epochs = 0 if args.interactive else args.num_epochs
+
+        callbacks = [callback_ckpt]
+        if not args.interactive:
+            callbacks.extend([train_callback, eval_callback])
+
+        nf.train(
+            tensors_to_optimize=[train_loss],
+            callbacks=callbacks,
+            lr_policy=lr_policy_fn,
+            batches_per_step=args.iter_per_step,
+            optimizer=args.optimizer_kind,
+            optimization_params={
+                "num_epochs": args.num_epochs,
+                "lr": args.lr,
+                "weight_decay": args.weight_decay,
+                "betas": (args.beta1, args.beta2),
+            },
+        )
 
 参考
 ----
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-LM
     :keyprefix: nlp-lm-
diff --git a/docs/docs_zh/sources/source/speech_command/datasets.rst b/docs/docs_zh/sources/source/speech_command/datasets.rst
new file mode 100644
index 000000000000..7c6218ba0a04
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/datasets.rst
@@ -0,0 +1,38 @@
+数据集
+======
+
+.. _GoogleSpeechCommands_dataset:
+
+
+谷歌语音指令数据集
+------------------
+
+准确地识别语音指令在很多场景都有应用。为了这一目的，谷歌发布了语音指令数据集 :cite:`speech-recognition-dataset-warden2018speech` 。
+这一数据集涵盖一些指令的短语音，例如，stop, go, up, down 等等。这些语音来自很多不同的说话人。为了推广这个数据集，谷歌还组织了一次 Kaggle 竞赛。在这项竞赛中，最终获胜的队伍取得了91%的分类准确率。
+
+我们借助 NeMo 中已有的 ASR 模型进行了测试，并发现效果很好。再加上数据增强技术，准确率可以被进一步提升。
+
+版本和预处理
+------------
+
+截至目前，谷歌共发布了两个版本的数据集。第一版共包含30个类别共6万5千条数据。第二版包含35个类别共11万条数据。当前我们主要使用第一版数据，以便与其他方法进行比较。
+
+脚本 `process_speech_commands_data.py` 可以被用来对数据集进行处理，以便将其转换为合适的格式。
+这个文件位于 `scripts` 文件夹中。你可以设定选项 `--data_root` 来指定数据集的位置，选项 `--data_version` 来指定版本。
+
+还有一个选项 `--rebalance` 可以被用来重新平衡数据集。
+
+.. code-block:: bash
+
+    python process_speech_commands_data.py --data_root=<data directory> --data_version=<1 or 2> {--rebalance}
+
+运行之后，你会得到三个文件： `train_manifest.json` ， `validation_manifest.json` 和 `test_manifest.json`
+在文件夹 `{data_root}/google_speech_recognition_v{1/2}` 中。
+
+参考
+----
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-DATASET
+    :keyprefix: speech-recognition-dataset-
diff --git a/docs/docs_zh/sources/source/speech_command/intro.rst b/docs/docs_zh/sources/source/speech_command/intro.rst
new file mode 100644
index 000000000000..0d6bd78bc809
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/intro.rst
@@ -0,0 +1,12 @@
+.. _speech-command-docs:
+
+
+语音指令
+========
+
+.. toctree::
+   :maxdepth: 8
+
+   tutorial
+   datasets
+   models
diff --git a/docs/docs_zh/sources/source/speech_command/models.rst b/docs/docs_zh/sources/source/speech_command/models.rst
new file mode 100644
index 000000000000..b89103245825
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/models.rst
@@ -0,0 +1,7 @@
+模型
+====
+
+.. toctree::
+   :maxdepth: 8
+
+   quartznet
diff --git a/docs/docs_zh/sources/source/speech_command/quartz_vertical.png b/docs/docs_zh/sources/source/speech_command/quartz_vertical.png
new file mode 100644
index 000000000000..39ef7534c783
Binary files /dev/null and b/docs/docs_zh/sources/source/speech_command/quartz_vertical.png differ
diff --git a/docs/docs_zh/sources/source/speech_command/quartznet.rst b/docs/docs_zh/sources/source/speech_command/quartznet.rst
new file mode 100644
index 000000000000..9f99f848710a
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/quartznet.rst
@@ -0,0 +1,32 @@
+QuartzNet
+---------
+
+QuartzNet 模型相当于使用了可分离卷积和更大卷积核的 Jasper 模型 :cite:`asr-models-li2019jasper` 。两个模型都能达到相似的准确率，但是 QuatzNet 模型的参数量要少一个数量级。
+与 Jasper 模型类似，QuartzNet 模型规格使用 QuartzNet_[BxR] 来表示，其中 B 表示模块的数量，R 表示卷积子模块的数量。
+
+我们使用这些模型在谷歌语音指令数据集上进行训练。
+
+.. image:: quartz_vertical.png
+    :align: center
+    :alt: quartznet model
+
+关于 QuartzNet 模型的详细信息可以参阅 `QuartzNet <https://arxiv.org/abs/1910.10261>`_ 。
+
+我们使用2个 GPU 进行了200 epochs 的混合精度训练，其中，batch size 设为128。整个训练大概需要1个小时。
+
+=============================== ===================== ============
+Network                         Dataset               Results
+=============================== ===================== ============
+QuartzNet3x1 (77k params)       Speech Commands V1    97.46% Test
+
+QuartzNet3x2 (93k params)       Speech Commands V2    97.35% Test
+=============================== ===================== ============
+
+
+参考
+----
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-MODELS
+    :keyprefix: speech-recognition-models-
diff --git a/docs/docs_zh/sources/source/speech_command/speech_recognition_all.bib b/docs/docs_zh/sources/source/speech_command/speech_recognition_all.bib
new file mode 100644
index 000000000000..277e56e7ec9b
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/speech_recognition_all.bib
@@ -0,0 +1,43 @@
+
+@inproceedings{hu2018squeeze,
+  title={Squeeze-and-excitation networks},
+  author={Hu, Jie and Shen, Li and Sun, Gang},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={7132--7141},
+  year={2018}
+}
+
+@article{karim2019multivariate,
+  title={Multivariate lstm-fcns for time series classification},
+  author={Karim, Fazle and Majumdar, Somshubra and Darabi, Houshang and Harford, Samuel},
+  journal={Neural Networks},
+  volume={116},
+  pages={237--245},
+  year={2019},
+  publisher={Elsevier}
+}
+
+@article{warden2018speech,
+  title={Speech commands: A dataset for limited-vocabulary speech recognition},
+  author={Warden, Pete},
+  journal={arXiv preprint arXiv:1804.03209},
+  year={2018}
+}
+
+@article{kriman2019quartznet,
+  title={Quartznet: Deep automatic speech recognition with 1d time-channel separable convolutions},
+  author={Kriman, Samuel and Beliaev, Stanislav and Ginsburg, Boris and Huang, Jocelyn and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Zhang, Yang},
+  journal={arXiv preprint arXiv:1910.10261},
+  year={2019}
+}
+
+@article{park2019,
+       author = {{Park}, Daniel S. and {Chan}, William and {Zhang}, Yu and
+         {Chiu}, Chung-Cheng and {Zoph}, Barret and {Cubuk}, Ekin D. and
+         {Le}, Quoc V.},
+        title = "{SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition}",
+      journal = {arXiv e-prints},
+      year = "2019",
+      eid = {arXiv:1904.08779},
+      eprint = {1904.08779},
+}
\ No newline at end of file
diff --git a/docs/docs_zh/sources/source/speech_command/tutorial.rst b/docs/docs_zh/sources/source/speech_command/tutorial.rst
new file mode 100644
index 000000000000..0a901d6849c7
--- /dev/null
+++ b/docs/docs_zh/sources/source/speech_command/tutorial.rst
@@ -0,0 +1,630 @@
+教程
+====
+
+请首先安装 ``nemo`` 和 ``nemo_asr`` 集合。
+具体的安装步骤请参阅 :ref:`installation` 章节。
+另外，本教程还使用 python 包 `torchaudio` 进行语音特征提取。
+
+
+入门
+----
+
+本教程基于 QuartzNet :cite:`speech-recognition-tut-kriman2019quartznet` 模型。其中的解码器部分做了些许修改以适配分类任务。
+
+1. 音频处理（特征提取）：包括信号归一化，滑窗处理，频谱转换（或者是梅尔频谱 MFCC ）
+2. 使用 SpecAugment :cite:`speech-recognition-tut-park2019` 进行数据增强，同时这一方法也能增加数据量。
+3. 创建一个小型的神经网络模型进行训练。
+
+数据准备
+--------
+
+我们使用开源的谷歌语音指令数据集 Google Speech Commands Dataset 。目前，我们使用的是第一版数据集。如果想使用第二版，还需要一些简单的修改。下面的命令可以下载数据并进行相应的格式转换。
+
+.. code-block:: bash
+
+    mkdir data
+    # process_speech_commands_data.py script is located under <nemo_git_repo_root>/scripts
+    # The `--rebalance` flag will duplicate elements in the train set so that all classes
+    # have the same number of elements. It is not mandatory to add this flag.
+    python process_speech_commands_data.py --data_root=data --data_version=1 --rebalance
+
+.. note::
+    如果使用第一版数据集 ``--data_version=1`` ，至少需要 4GB 的硬盘空间。如果使用第二版 ``--data_version=2`` 至少需要 16 GB 的硬盘空间。另外，下载和处理的过程均需要一些时间。
+
+下载和处理完成后，你会得到一个名为 `data` 文件夹，其中包含另一个文件夹名称为 `google_speech_recognition_v{1/2}` 。
+在这个文件夹中会有多个子目录包含很多 wav 文件和三个 json 文件，分别是
+
+* `train_manifest.json`
+* `validation_manifest.json`
+* `test_manifest.json`
+
+json文件的每一行代表一条训练数据。其中， `audio_filepath` 属性是音频文件的地址， `duration` 是音频时间长度， `command` 是音频的标注。
+
+.. code-block:: json
+
+    {"audio_filepath": "<absolute path to dataset>/two/8aa35b0c_nohash_0.wav", "duration": 1.0, "command": "two"}
+    {"audio_filepath": "<absolute path to dataset>/two/ec5ab5d5_nohash_2.wav", "duration": 1.0, "command": "two"}
+
+
+训练
+----
+
+我们使用的是 QuartzNet 模型 :cite:`speech-recognition-tut-kriman2019quartznet` 。
+相比于 Jasper 模型, QuartzNet 模型中使用了可分离卷积 (Separable Convolutions) ，大幅减少了参数数量。
+
+QuartzNet 模型使用一种固定的模型定义模式： QuartzNet-[BxR], 其中 B 是模块的数量，R 是卷积子模块的数量。每个子模块包含一个 1D 掩码卷积，批归一化， ReLU 激活和 dropout 。
+
+    .. image:: quartz_vertical.png
+        :align: center
+        :alt: quartznet model
+
+本教程中我们使用的是 QuartzNet [3x1] 模型。
+接下来的脚本会在一个 GPU 上进行训练和评估。
+
+    .. tip::
+        借助 Jupyter 笔记本一步一步地运行这个脚本。
+
+**训练脚本**
+
+.. code-block:: python
+
+    # Import some utility functions
+    import argparse
+    import copy
+    import math
+    import os
+    import glob
+    from functools import partial
+    from datetime import datetime
+    from ruamel.yaml import YAML
+
+    # NeMo's "core" package
+    import nemo
+    # NeMo's ASR collection
+    import nemo.collections.asr as nemo_asr
+    # NeMo's learning rate policy
+    from nemo.utils.lr_policies import CosineAnnealing
+    from nemo.collections.asr.helpers import (
+        monitor_classification_training_progress,
+        process_classification_evaluation_batch,
+        process_classification_evaluation_epoch,
+    )
+
+    from nemo.utils import logging
+
+    # Lets define some hyper parameters
+    lr = 0.05
+    num_epochs = 100
+    batch_size = 128
+    weight_decay = 0.001
+
+    # Create a Neural Factory
+    # It creates log files and tensorboard writers for us among other functions
+    neural_factory = nemo.core.NeuralModuleFactory(
+        log_dir='./quartznet-3x1-v1',
+        create_tb_writer=True)
+    tb_writer = neural_factory.tb_writer
+
+    # Path to our training manifest
+    train_dataset = "<path_to_where_you_put_data>/train_manifest.json"
+
+    # Path to our validation manifest
+    eval_datasets = "<path_to_where_you_put_data>/test_manifest.json"
+
+    # Here we will be using separable convolutions
+    # with 3 blocks (k=3 repeated once r=1 from the picture above)
+    yaml = YAML(typ="safe")
+    with open("<nemo_git_repo_root>/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml") as f:
+        jasper_params = yaml.load(f)
+
+    # Pre-define a set of labels that this model must learn to predict
+    labels = jasper_params['labels']
+
+    # Get the sampling rate of the data
+    sample_rate = jasper_params['sample_rate']
+
+    # Check if data augmentation such as white noise and time shift augmentation should be used
+    audio_augmentor = jasper_params.get('AudioAugmentor', None)
+
+    # Build the input data layer and the preprocessing layers for the train set
+    train_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=train_dataset,
+        labels=labels,
+        sample_rate=sample_rate,
+        batch_size=batch_size,
+        num_workers=os.cpu_count(),
+        augmentor=audio_augmentor,
+        shuffle=True
+    )
+
+     # Build the input data layer and the preprocessing layers for the test set
+    eval_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=eval_datasets,
+        sample_rate=sample_rate,
+        labels=labels,
+        batch_size=args.eval_batch_size,
+        num_workers=os.cpu_count(),
+        shuffle=False,
+    )
+
+    # We will convert the raw audio data into MFCC Features to feed as input to our model
+    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
+        sample_rate=sample_rate, **jasper_params["AudioToMFCCPreprocessor"],
+    )
+
+    # Compute the total number of samples and the number of training steps per epoch
+    N = len(train_data_layer)
+    steps_per_epoch = math.ceil(N / float(args.batch_size))
+
+    logging.info("Steps per epoch : {0}".format(steps_per_epoch))
+    logging.info('Have {0} examples to train on.'.format(N))
+
+    # Here we begin defining all of the augmentations we want
+    # We will pad the preprocessed spectrogram image to have a certain number of timesteps
+    # This centers the generated spectrogram and adds black boundaries to either side
+    # of the padded image.
+    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(audio_length=128)
+
+    # We also optionally add `SpecAugment` augmentations based on the config file
+    # SpecAugment has various possible augmentations to the generated spectrogram
+    # 1) Frequency band masking
+    # 2) Time band masking
+    # 3) Rectangular cutout
+    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
+    if spectr_augment_config:
+        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)
+
+    # Build the QuartzNet Encoder model
+    # The config defines the layers as a list of dictionaries
+    # The first and last two blocks are not considered when we say QuartzNet-[BxR]
+    # B is counted as the number of blocks after the first layer and before the penultimate layer.
+    # R is defined as the number of repetitions of each block in B.
+    # Note: We can scale the convolution kernels size by the float parameter `kernel_size_factor`
+    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"])
+
+    # We then define the QuartzNet decoder.
+    # This decoder head is specialized for the task for classification, such that it
+    # accepts a set of `N-feat` per timestep of the model, and averages these features
+    # over all the timesteps, before passing a Linear classification layer on those features.
+    jasper_decoder = nemo_asr.JasperDecoderForClassification(
+        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
+        num_classes=len(labels),
+        **jasper_params['JasperDecoderForClassification'],
+    )
+
+    # We can easily apply cross entropy loss to train this model
+    ce_loss = nemo_asr.CrossEntropyLossNM()
+
+    # Lets print out the number of parameters of this model
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(
+        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
+    )
+    logging.info('================================')
+
+    # Now we have all of the components that are required to build the NeMo execution graph!
+    ## Build the training data loaders and preprocessors first
+    audio_signal, audio_signal_len, commands, command_len = train_data_layer()
+    processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
+    processed_signal, processed_signal_len = crop_pad_augmentation(
+        input_signal=processed_signal,
+        length=audio_signal_len
+    )
+
+    ## Augment the dataset for training
+    if spectr_augment_config:
+        processed_signal = data_spectr_augmentation(input_spec=processed_signal)
+
+    ## Define the model
+    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
+    decoded = jasper_decoder(encoder_output=encoded)
+
+    ## Obtain the train loss
+    train_loss = ce_loss(logits=decoded, labels=commands)
+
+    # Now we build the test graph in a similar way, reusing the above components
+    ## Build the test data loader and preprocess same way as train graph
+    ## But note, we do not add the spectrogram augmentation to the test graph !
+    test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_data_layer()
+    test_processed_signal, test_processed_signal_len = data_preprocessor(
+        input_signal=test_audio_signal, length=test_audio_signal_len
+    )
+    test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
+        input_signal=test_processed_signal, length=test_processed_signal_len
+    )
+
+    # Pass the test data through the model encoder and decoder
+    test_encoded, test_encoded_len = jasper_encoder(
+        audio_signal=test_processed_signal, length=test_processed_signal_len
+    )
+    test_decoded = jasper_decoder(encoder_output=test_encoded)
+
+    # Compute test loss for visualization
+    test_loss = ce_loss(logits=test_decoded, labels=test_commands)
+
+    # Now that we have our training and evaluation graphs built,
+    # we can focus on a few callbacks to help us save the model checkpoints
+    # during training, as well as display train and test metrics
+
+    # Callbacks needed to print train info to console and Tensorboard
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        # Notice that we pass in loss, predictions, and the labels.
+        # Of course we would like to see our training loss, but we need the
+        # other arguments to calculate the accuracy.
+        tensors=[train_loss, decoded, commands],
+        # The print_func defines what gets printed.
+        print_func=partial(monitor_classification_training_progress, eval_metric=None),
+        get_tb_values=lambda x: [("loss", x[0])],
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    # Callbacks needed to print test info to console and Tensorboard
+    tagname = 'TestSet'
+    eval_callback = nemo.core.EvaluatorCallback(
+        eval_tensors=[test_loss, test_decoded, test_commands],
+        user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),
+        user_epochs_done_callback=partial(process_classification_evaluation_epoch, eval_metric=1, tag=tagname),
+        eval_step=200,  # How often we evaluate the model on the test set
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    # Callback to save model checkpoints
+    chpt_callback = nemo.core.CheckpointCallback(
+        folder=neural_factory.checkpoint_dir,
+        step_freq=1000,
+    )
+
+    # Prepare a list of checkpoints to pass to the engine
+    callbacks = [train_callback, eval_callback, chpt_callback]
+
+    # Now we have all the components required to train the model
+    # Lets define a learning rate schedule
+
+    # Define a learning rate schedule
+    lr_policy = CosineAnnealing(
+        total_steps=num_epochs * steps_per_epoch,
+        warmup_ratio=0.05,
+        min_lr=0.001,
+    )
+
+    logging.info(f"Using `{lr_policy}` Learning Rate Scheduler")
+
+    # Finally, lets train this model !
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=lr_policy,
+        optimizer="novograd",
+        optimization_params={
+            "num_epochs": num_epochs,
+            "max_steps": None,
+            "lr": lr,
+            "momentum": 0.95,
+            "betas": (0.98, 0.5),
+            "weight_decay": weight_decay,
+            "grad_norm_clip": None,
+        },
+        batches_per_step=1,
+    )
+
+.. note::
+    整个训练过程大概需要 100 个 epoch ，在 GTX 1080 GPU 上大概需要 4-5 小时。
+
+.. tip::
+    想要进一步提升准确率，可以尝试下列方法：
+        (1) 更长时间的训练 (200-300 epochs)
+        (2) 使用更多的数据
+        (3) 选择更大的模型
+        (4) 使用多个 GPU 或者使用混合精度训练
+        (5) 使用一个预训练的模型
+
+混合精度训练
+------------
+
+可以借助英伟达的 `APEX 工具包 <https://github.com/NVIDIA/apex>`_ 进行混合精度训练和分布式训练。
+要进行混合精度训练，你只需要设置 `optimization_level` 选项为 `nemo.core.Optimization.mxprO1` 。例如：
+
+.. code-block:: python
+
+    nf = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=nemo.core.Optimization.mxprO1,
+        placement=nemo.core.DeviceType.AllGpu,
+        cudnn_benchmark=True)
+
+
+多 GPU 训练
+-----------
+
+在 NeMo 中进行多 GPU 训练也非常容易：
+
+   (1) 将 `NeuralModuleFactory` 类的 `placement` 选项设置为 `nemo.core.DeviceType.AllGpu`
+   (2) 添加命令行选项 `local_rank` : `parser.add_argument("--local_rank", default=None, type=int)`
+   (3) 导入 `torch.distributed.launch` 包并且使用如下的方式运行脚本：
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py ...
+
+.. note::
+    混合精度训练依赖于 Tensor Cores 硬件单元，所以当前只支持英伟达 Volta 和 Turing 架构 GPU
+
+
+完整的训练案例
+~~~~~~~~~~~~~~
+
+更详细的一个训练案例请参阅文件 `<nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py` 。
+在这个案例中，我们分别构建了训练，评估和测试的计算图。
+下面的这条命令会启动8个 GPU 并进行混合精度训练。其中的 json 文件指定了数据集信息。
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py --model_config "<nemo_git_repo_root>/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml" \
+      --train_dataset="<absolute path to dataset>/train_manifest.json" --eval_datasets "<absolute path to dataset>/validation_manifest.json" "<absolute path to dataset>/test_manifest.json" \
+      --num_epochs=200 --batch_size=128 --eval_batch_size=128 --eval_freq=200 --lr=0.05 --min_lr=0.001 \
+      --optimizer="novograd" --weight_decay=0.001 --amp_opt_level="O1" --warmup_ratio=0.05 --hold_ratio=0.45 \
+      --checkpoint_dir="./checkpoints/quartznet_speech_commands_checkpoints_3x1_v1/" \
+      --exp_name="./results/quartznet_speech_classification-quartznet-3x1_v1/"
+
+.. tip::
+    你还可以同时输入多个 json 文件，以便在多个数据集上进行训练。例如： `--train_manifest=/manifests/<first dataset>.json,/manifests/<second dataset>.json`
+
+
+微调 (Fine-tuning)
+------------------
+
+如果使用一个预训练好的模型，那么训练时间可以被大大缩短：
+1. 准备一个预训练模型，包含 jasper_encoder, jasper_decoder 和配置文件。
+2. 载入模型权重，使用类似于下面这样的代码：
+
+.. code-block:: python
+
+    jasper_encoder.restore_from("<path_to_checkpoints>/JasperEncoder-STEP-89000.pt")
+    jasper_decoder.restore_from("<path_to_checkpoints>/JasperDecoderForClassification-STEP-89000.pt")
+    # in case of distributed training add args.local_rank
+    jasper_decoder.restore_from("<path_to_checkpoints>/JasperDecoderForClassification-STEP-89000.pt", args.local_rank)
+
+.. tip::
+    微调的时候，最好降低学习率。
+
+
+评估
+----
+
+我们可以下载预训练模型，并用它在谷歌语音指令数据集上检验分类准确率。
+
+.. note::
+    如果你想亲自听一下数据集中的音频，你可以在 notebook 里运行下面的这份代码。
+
+.. code-block:: python
+
+    # Lets add some generic imports.
+    # Please note that you will need to install `librosa` for this code
+    # To install librosa : Run `!pip install librosa` from the notebook itself.
+    import glob
+    import os
+    import json
+    import re
+    import numpy as np
+    import torch
+    import librosa
+    import librosa.display
+    import matplotlib.pyplot as plt
+    import IPython.display as ipd
+    from ruamel.yaml import YAML
+
+    # Import nemo and asr collections
+    import nemo
+    import nemo.collections.asr as nemo_asr
+
+    from nemo.utils import logging
+
+    # We add some
+    data_dir = '<path to the data directory>'
+    data_version = 1
+    config_path = '<path to the config file for this model>'
+    model_path = '<path to the checkpoint directory for this model>'
+
+    test_manifest = os.path.join(data_dir, "test_manifest.json")
+
+    # Parse the config file provided to us
+    # Parse config and pass to model building function
+    yaml = YAML(typ='safe')
+    with open(config_path) as f:
+        params = yaml.load(f)
+        logging.info("******\nLoaded config file.\n******")
+
+    labels = params['labels']  # Vocab of tokens
+    sample_rate = params['sample_rate']
+    batch_size = 128
+
+    # Build the evaluation graph
+    # Create our NeuralModuleFactory, which will oversee the neural modules.
+    neural_factory = nemo.core.NeuralModuleFactory(
+        log_dir=f'v{data_version}/eval_results/')
+
+    logger = neural_factory.logger
+
+    test_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=test_manifest,
+        labels=labels,
+        sample_rate=sample_rate,
+        shuffle=False,
+        batch_size=batch_size,
+    )
+    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(
+        audio_length=128
+    )
+    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
+        sample_rate=sample_rate,
+        **params['AudioToMFCCPreprocessor']
+    )
+
+    # Create the Jasper_3x1 encoder as specified, and a classification decoder
+    encoder = nemo_asr.JasperEncoder(**params['JasperEncoder'])
+    decoder = nemo_asr.JasperDecoderForClassification(
+        feat_in=params['JasperEncoder']['jasper'][-1]['filters'],
+        num_classes=len(labels),
+        **params['JasperDecoderForClassification']
+    )
+
+    ce_loss = nemo_asr.CrossEntropyLossNM()
+
+    # Assemble the DAG components
+    test_audio_signal, test_audio_signal_len, test_commands, test_command_len = test_data_layer()
+
+    test_processed_signal, test_processed_signal_len = data_preprocessor(
+        input_signal=test_audio_signal,
+        length=test_audio_signal_len
+    )
+
+    # --- Crop And Pad Augment --- #
+    test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
+        input_signal=test_processed_signal,
+        length=test_processed_signal_len
+    )
+
+    test_encoded, test_encoded_len = encoder(
+        audio_signal=test_processed_signal,
+        length=test_processed_signal_len
+    )
+
+    test_decoded = decoder(
+        encoder_output=test_encoded
+    )
+
+    test_loss = ce_loss(
+        logits=test_decoded,
+        labels=test_commands
+    )
+
+    # We import the classification accuracy metric to compute Top-1 accuracy
+    from nemo.collections.asr.metrics import classification_accuracy
+    from functools import partial
+
+    # --- Inference Only --- #
+    # We've already built the inference DAG above, so all we need is to call infer().
+    evaluated_tensors = neural_factory.infer(
+        # These are the tensors we want to get from the model.
+        tensors=[test_loss, test_decoded, test_commands],
+        # checkpoint_dir specifies where the model params are loaded from.
+        checkpoint_dir=model_path
+        )
+
+    # Let us count the total number of incorrect classifications by this model
+    correct_count = 0
+    total_count = 0
+
+    for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):
+        acc = classification_accuracy(
+            logits=logits,
+            targets=labels,
+            top_k=[1]
+        )
+
+        # Select top 1 accuracy only
+        acc = acc[0]
+
+        # Since accuracy here is "per batch", we simply denormalize it by multiplying
+        # by batch size to recover the count of correct samples.
+        correct_count += int(acc * logits.size(0))
+        total_count += logits.size(0)
+
+    logging.info(f"Total correct / Total count : {correct_count} / {total_count}")
+    logging.info(f"Final accuracy : {correct_count / float(total_count)}")
+
+    # Let us now filter out the incorrectly labeled samples from the total set of samples in the test set
+
+    # First lets create a utility class to remap the integer class labels to actual string label
+    class ReverseMapLabel:
+        def __init__(self, data_layer: nemo_asr.AudioToSpeechLabelDataLayer):
+            self.label2id = dict(data_layer._dataset.label2id)
+            self.id2label = dict(data_layer._dataset.id2label)
+
+        def __call__(self, pred_idx, label_idx):
+            return self.id2label[pred_idx], self.id2label[label_idx]
+
+    # Next, lets get the indices of all the incorrectly labeled samples
+    sample_idx = 0
+    incorrect_preds = []
+    rev_map = ReverseMapLabel(test_data_layer)
+
+    for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):
+        probs = torch.softmax(logits, dim=-1)
+        probas, preds = torch.max(probs, dim=-1)
+
+        incorrect_ids = (preds != labels).nonzero()
+        for idx in incorrect_ids:
+            proba = float(probas[idx][0])
+            pred = int(preds[idx][0])
+            label = int(labels[idx][0])
+            idx = int(idx[0]) + sample_idx
+
+            incorrect_preds.append((idx, *rev_map(pred, label), proba))
+
+        sample_idx += labels.size(0)
+
+    logging.info(f"Num test samples : {total_count}")
+    logging.info(f"Num errors : {len(incorrect_preds)}")
+
+    # First lets sort by confidence of prediction
+    incorrect_preds = sorted(incorrect_preds, key=lambda x: x[-1], reverse=False)
+
+    # Lets print out the (test id, predicted label, ground truth label, confidence)
+    # tuple of first 20 incorrectly labeled samples
+    for incorrect_sample in incorrect_preds[:20]:
+        logging.info(str(incorrect_sample))
+
+    # Lets define a threshold below which we designate a model's prediction as "low confidence"
+    # and then filter out how many such samples exist
+    low_confidence_threshold = 0.25
+    count_low_confidence = len(list(filter(lambda x: x[-1] <= low_confidence_threshold, incorrect_preds)))
+    logging.info(f"Number of low confidence predictions : {count_low_confidence}")
+
+    # One interesting observation is to actually listen to these samples whose predicted labels were incorrect
+    # Note: The following requires the use of a Notebook environment
+
+    # First lets create a helper function to parse the manifest files
+    def parse_manifest(manifest):
+        data = []
+        for line in manifest:
+            line = json.loads(line)
+            data.append(line)
+
+        return data
+
+    # Now lets load the test manifest into memory
+    test_samples = []
+    with open(test_manifest, 'r') as test_f:
+        test_samples = test_f.readlines()
+
+    test_samples = parse_manifest(test_samples)
+
+    # Next, lets create a helper function to actually listen to certain samples
+    def listen_to_file(sample_id, pred=None, label=None, proba=None):
+        # Load the audio waveform using librosa
+        filepath = test_samples[sample_id]['audio_filepath']
+        audio, sample_rate = librosa.load(filepath)
+
+        if pred is not None and label is not None and proba is not None:
+            logging.info(f"Sample : {sample_id} Prediction : {pred} Label : {label} Confidence = {proba: 0.4f}")
+        else:
+            logging.info(f"Sample : {sample_id}")
+
+        return ipd.Audio(audio, rate=sample_rate)
+
+    # Finally, lets listen to all the audio samples where the model made a mistake
+    # Note: This list of incorrect samples may be quite large, so you may choose to subsample `incorrect_preds`
+    for sample_id, pred, label, proba in incorrect_preds:
+        ipd.display(listen_to_file(sample_id, pred=pred, label=label, proba=proba))  # Needs to be run in a notebook environment
+
+参考
+----
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-ALL-TUT
+    :keyprefix: speech-recognition-tut-
diff --git a/docs/docs_zh/sources/source/training.rst b/docs/docs_zh/sources/source/training.rst
index b7c8c257d9aa..10037571381f 100644
--- a/docs/docs_zh/sources/source/training.rst
+++ b/docs/docs_zh/sources/source/training.rst
@@ -3,10 +3,11 @@
 
 训练较大的模型，特别是从头开始训练，需要巨大的算力。NeMo 支持分布式训练和混合精度训练以加速训练。NeMo 借助 `英伟达的 APEX 库 <https://github.com/NVIDIA/apex>`_ 在英伟达 GPU 上达到最佳的性能。另外，配备了多块 GPU 的系统（例如 DGX Station, DGX-1 & DGX-2 等），可以进一步地使用 *NVLINK* 加速 GPU 间的通信，从而最大限度地发挥 GPU 的性能。
 
+
 混合精度训练
 ~~~~~~~~~~~~
-
-在英伟达最新的 Volta 和 Turning 架构中，GPU 配备了 Tensor Cores 计算单元，能够大幅加速半精度浮点数的矩阵乘法运算。想要在 NeMo 中使用混合精度训练，你可以设置 `nemo.core.NeuralModuleFactory` 类的 ``optimization_level`` 选项为 ``nemo.core.Optimization.mxprO1`` 。
+在英伟达最新的 Volta 和 Turning 架构中，GPU 配备了 Tensor Cores 计算单元，能够大幅加速半精度浮点数的矩阵乘法运算。
+想要在 NeMo 中使用混合精度训练，你可以设置 `nemo.core.NeuralModuleFactory` 类的 ``optimization_level`` 选项为 ``nemo.core.Optimization.mxprO1`` 。
 
 .. code-block:: python
 
@@ -21,38 +22,68 @@
 
 进行多 GPU 训练需要进行如下设置：
 
-(1) 在 ``NeuralModuleFactory`` 类中设置选项 ``placement`` 为 ``nemo.core.DeviceType.AllGpu``
-(2) 在你的 python 脚本中添加命令行选项 ``local_rank``: ``parser.add_argument("--local_rank", default=None, type=int)``
+在你的 python 脚本中添加命令行选项 ``local_rank``: ``parser.add_argument("--local_rank", default=os.getenv('LOCAL_RANK', None), type=int)``
 
 .. code-block:: python
 
     nf = nemo.core.NeuralModuleFactory(
-           placement=nemo.core.DeviceType.AllGpu,
            local_rank=args.local_rank)
 
 
-利用 PyTorch 中的 `torch.distributed.launch` 包来启动训练：
+利用 PyTorch 中的 `torch.distributed.launch` 包运行脚本(假设8块GPU)：
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=8 <nemo_repo>/examples/asr/jasper.py --num_gpus=8 ...
-
+    python -m torch.distributed.launch --nproc_per_node=8 <nemo_repo>/examples/asr/jasper.py ...
 
 范例
 ~~~~
 
-一个比较完整的利用 NeMo 训练 ASR 模型的范例，请参阅这个文件： `<nemo_git_repo_root>/examples/asr/jasper.py` 。 这个例子会创建一个训练有向无环图和三个验证有向无环图，以便在不同的数据集上对模型精度进行验证。
+一个比较完整的利用 NeMo 训练 ASR 模型的范例，请参阅这个文件： `<nemo_git_repo_root>/examples/asr/jasper.py` 。 
+这个例子会创建一个训练有向无环图和三个验证集上的有向无环图，以便在不同的数据集上对模型进行验证。
 
 在一台配备了多块 Volta GPU 的系统上，你可以用如下的命令来开始训练：
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_gpus=8 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_manifest=/manifests/librivox-train-all.json --val_manifest1=/manifests/librivox-dev-clean.json --val_manifest2=/manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/jasper15x5SEP.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
+    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_manifest=/manifests/librivox-train-all.json --val_manifest1=/manifests/librivox-dev-clean.json --val_manifest2=/manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/jasper15x5SEP.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
 
-这条命令会进行8卡并行和混合精度训练，在上面的命令中，不同的列表文件（.json）指的是不同的数据集。你可以用自己的数据集来代替它们。
+这条命令会触发8卡并行和混合精度训练，在上面的命令中，不同的列表文件（.json）指的是不同的数据集。你可以用自己的数据集来代替它们。
 
 .. tip::
-    你可以在选项中同时传入多个数据集，使用逗号隔开，例如：
-    ``--train_manifest=/manifests/librivox-train-all.json,/manifests/librivox-train-all-sp10pcnt.json,/manifests/cv/validated.json``
+    你可以在选项中同时传入多个数据集，使用逗号隔开，例如：``--train_manifest=/manifests/librivox-train-all.json,/manifests/librivox-train-all-sp10pcnt.json,/manifests/cv/validated.json``
+
+这个例子会在三个数据集上进行训练，LibriSpeech, Mozzila Common Voice 和 Librispeech 做了速度扰动后的数据集。
+
+多节点训练
+~~~~~~~~~~
+我们强烈建议在进行多节点训练前，先阅读 pytorch 的分布式文档。这里是一个使用 TCP 初始化进行多节点训练的方法。
+假设我们有两台机子，每台4张卡。
+我们把机子1当主节点(master)。我们需要主节点的 IP 地址，以及它上面的一个空闲端口。
+在机子1上，我们运行:
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=4 --nnodes=2 --node_rank=0 --master_addr=<MASTER_IP_ADDRESS> --master_port=<FREE_PORT> jasper.py ...
+
+在机子2上，运行:
 
-这个例子会在三个数据集上进行训练，LibriSpeech, Mozzila Common Voice 和 Librispeech做了速度扰动后的数据集。
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=4 --nnodes=2 --node_rank=1 --master_addr=<MASTER_IP_ADDRESS> --master_port=<FREE_PORT> jasper.py ...
+
+.. tip::
+    设置环境变量 NCCL_DEBUG 为 INFO 来发现启动时候的问题
+
+.. tip::
+    我们推荐阅读下面的 pytorch 文档 
+    https://pytorch.org/docs/stable/distributed.html#launch-utility
+    https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py
+
+.. tip::
+    关于多进程, neural_factory 包含了两个属性 ``local_rank`` 和 ``global_rank``。
+    ``local_rank`` 指的是当前机子上的 rank, 而 ``global_rank`` 指的是所有机子上的 rank。
+    比如, 假设你有2台机子，每台4张GPU。 global_rank 0 指的是 local_rank 0 并且是第一台机子的
+    第一张GPU, 而 global_rank 5 可以是 local_rank 0 并且是第二台机子的第一张卡。换句话说
+    local_rank == 0 并且 global_rank == 0 确保了它占有主节点上的第一张卡； local_rank == 0
+    且 global_rank != 0 确保它占有奴隶节点上的第一张卡。
diff --git a/docs/docs_zh/sources/source/tts/fastspeech.rst b/docs/docs_zh/sources/source/tts/fastspeech.rst
new file mode 100644
index 000000000000..d5fa058a9462
--- /dev/null
+++ b/docs/docs_zh/sources/source/tts/fastspeech.rst
@@ -0,0 +1,37 @@
+.. _fastspeech:
+
+Fast Speech
+===========
+
+模型
+----
+这个模型基于
+`Fast Speech 模型 <https://www.microsoft.com/en-us/research/blog/fastspeech-new-text-to-speech-model-improves-on-speed-accuracy-and-controllability>`_
+(另可见 `此文献 <https://arxiv.org/abs/1905.09263>`_)。
+
+Fast Speech 包含两个不同的阶段：持续时间（durations）抽取 和 实际训练。
+
+持续时间抽取
+++++++++++++
+
+第一个阶段是持续时间的抽取，首先，对于每一个输入数据集中的字符，你应该获得一个表示其持续时长的整型数值，该数值对应着音频样本中该字符持续的时间步数量。
+对此，NeMo 使用从 Tacotron 2 推理时提取的输入字符与梅尔谱的对齐映射矩阵（alignment map）来指导训练。
+对每一个时间步，我们都将该时间步在对齐映射矩阵中最强信号值对应的字符的持续时间增加一个单位。
+
+想要完成以上步骤，请运行位于 NeMo/examples/tts 的 fastspeech_alignments.py 文件，并指定以下参数（提供存储 durations 的路径）：
+
+.. code-block:: bash
+
+    python fastspeech_durations.py --spec_model=tacotron2 --spec_model_config=configs/tacotron2.yaml --spec_model_load_dir=<directory_with_tacotron2_checkopints> --eval_dataset=<data_root>/ljspeech_train.json --durations_dir=<data_root>/durs
+
+Fast Speech 训练
+++++++++++++++++
+
+第二个阶段是实际模型的训练。 NeMo 将 fast speech 中所有 梅尔谱合成 以及 持续时间计算 的逻辑都打包在一个对应名称的神经模块中。
+FastSpeechLoss 会根据其输出计算损失值。
+
+要使用上一步骤中抽取的 librispeech 数据的持续时间来开始训练，请执行以下命令：
+
+.. code-block:: bash
+
+    python fastspeech.py --model_config=configs/fastspeech.yaml --train_dataset=<data_root>/ljspeech_train.json --durations_dir=<data_root>/durs
diff --git a/docs/docs_zh/sources/source/tts/models.rst b/docs/docs_zh/sources/source/tts/models.rst
index cf8b4f36ecbc..3f4053739614 100644
--- a/docs/docs_zh/sources/source/tts/models.rst
+++ b/docs/docs_zh/sources/source/tts/models.rst
@@ -6,3 +6,4 @@
 
    tacotron2
    waveglow
+   fastspeech
diff --git a/docs/docs_zh/sources/source/tts/tutorial.rst b/docs/docs_zh/sources/source/tts/tutorial.rst
index 0ee27cfe1782..8d2ebb68c4dd 100644
--- a/docs/docs_zh/sources/source/tts/tutorial.rst
+++ b/docs/docs_zh/sources/source/tts/tutorial.rst
@@ -72,7 +72,7 @@ NeMo/examples/tts。假设你当前已经位于 NeMo/examples/tts 目录下，
 混合精度训练
 ------------
 启用或关闭混合精度训练可以通过一个命令行参数来控制 ``--amp_opt_level`` 。对于 Tacotron 2
-和 Waveglow 来说，该参数建议的默认值为 ``O1`` 。该参数值可以设置为以下几种：
+，该参数建议的默认值为 ``O0``，对于 Waveglow，该参数建议的默认值为  ``O1``。该参数值可以设置为以下几种：
 
 - O0: 单精度（float32）训练
 - O1: 混合精度训练
@@ -96,7 +96,7 @@ torch.distributed.launch 模块并指定 ``--nproc_per_node`` 参数为 GPU 的
 
 合成语音
 ---------
-你可以使用自己训练的 Tacotron 2 模型合成语音，也可以使用我们预训练好的 Tacotron 2 模型合成语音(`下载链接 <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_)。 
+你可以使用自己训练的 Tacotron 2 模型合成语音，也可以使用我们预训练好的模型(`Tacotron 2 下载链接 <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_)，以及(`Waveglow 模型下载链接 <https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ljspeech>`_)。
 下一步，请创建你想用于语音合成的文本，并将其转化为训练数据格式相同的 JSON 格式。该 JSON 文件格式如下所示：
 
 .. code-block:: json
@@ -120,3 +120,7 @@ torch.distributed.launch 模块并指定 ``--nproc_per_node`` 参数为 GPU 的
     python tts_infer.py --spec_model=tacotron2 --spec_model_config=configs/tacotron2.yaml --spec_model_load_dir=<directory_with_tacotron2_checkopints> --vocoder=waveglow --vocoder_model_config=configs/waveglow.yaml --vocoder_model_load_dir=<directory_with_waveglow_checkopints> --save_dir=<where_you_want_to_save_wav_files> --eval_dataset <mainfest_to_generate>
 
 要合成普通话语音，记得将 Tacotron 2 模型配置文件更换为 tacotron2_mandarin.yaml。
+
+.. tip::
+    你可以通过 ``--waveglow_denoiser_strength`` 和 ``--waveglow_sigma`` 参数来进一步控制 Waveglow 的推理过程。
+    如果合成的音频中含有白噪声，我们推荐从 0 开始逐渐提高 ``--waveglow_denoiser_strength`` 参数的值以缓解这种情况。
diff --git a/docs/docs_zh/sources/source/tutorials/callbacks.rst b/docs/docs_zh/sources/source/tutorials/callbacks.rst
index a6b8fdb82d57..b96ea534c3b6 100644
--- a/docs/docs_zh/sources/source/tutorials/callbacks.rst
+++ b/docs/docs_zh/sources/source/tutorials/callbacks.rst
@@ -19,13 +19,17 @@ CheckpointCallback，和 EvaluatorCallback。
 SimpleLossLoggerCallback
 ------------------------
 SimpleLossLoggerCallback 是用来记录训练过程中的一些指标数据比如 loss 以及打印到屏幕
-或者 tensorboard 上两个时间步的间隔。SimpleLossLoggerCallback 有一个必须的参数和两个我们建议
-重写的参数。它接受一个 list 的 NMTensors, 这些NMTensors会在训练过程中作为 print_func()，
-get_tb_values() 和 log_to_tb_func() 函数的输入。两个推荐重写的参数是 print_func() 和
+或者 tensorboard 上两个时间步的间隔。
+SimpleLossLoggerCallback 有一个必须的参数和两个我们建议重写的参数。
+它接受一个 list 的 NMTensors, 
+这些NMTensors会在训练过程中作为 print_func()，
+get_tb_values() 和 log_to_tb_func() 函数的输入。
+两个推荐重写的参数是 print_func() 和
 get_tb_values() 或者 log_to_tb_func() 任选其一。
 
 print_func() 应该用来记录打印到屏幕上的值。我们推荐使用 logging.info()
-来取代 print() 函数。比如，可以这么打印 loss 值：
+来取代 print() 函数。
+比如，可以这么打印 loss 值：
 
 .. code-block:: python
 
@@ -35,10 +39,13 @@ print_func() 应该用来记录打印到屏幕上的值。我们推荐使用 log
 
 我们提供了两个方法来打印到 tensorboard: get_tb_values() 和
 log_to_tb_func()。对于记录标量的简单用例，我们推荐使用 get_tb_values()。
-对于高级用例，像是图片或者音频，我们推荐用 log_to_tb_func() 函数。
+对于高级用例，像是图片或者音频，
+我们推荐用 log_to_tb_func() 函数。
 
-get_tb_values() 用来返回需要打印到 tensorboard 的值。它应该返回一个 list，其中每个元素是一个二元组。
-二元组的第一个元素是一个字符串，表示 tensorbard 标签，第二个元素是要记录的标量值。
+get_tb_values() 用来返回需要打印到 tensorboard 的值。
+它应该返回一个 list，其中每个元素是一个二元组。
+二元组的第一个元素是一个字符串，表示 tensorbard 标签，
+第二个元素是要记录的标量值。
 注意我们当前只支持标量值。注意如果要用 get_tb_values()，tb_writer 也需要定义。
 
 .. code-block:: python
@@ -72,15 +79,17 @@ SimpleLossLoggerCallback可以像下面这样创建:
         get_tb_values=my_get_tb_values,
         # 我们想要回调这个函数的频次
         step_freq=500,
-        # 我们想要用的 tensorboard writer, 如果 create_tb_writer 在 neural_factory
+        # 我们想要用的 tensorboard writer, 
+        # 如果 create_tb_writer 在 neural_factory
         # 中设置为True, 那么它会自动在 neural_factory 创建的时候被创建
         tb_writer=neural_factory.tb_writer)
     )
 
 CheckpointCallback
 ------------------
-CheckpointCallback 用于在训练过程中对 checkpoint 模型进行的操作，这样他们后面
-就可以重新加载来做推理或者微调。CheckpointCallback 用起来很简单:
+CheckpointCallback 用于在训练过程中对 checkpoint 模型进行的操作，
+这样他们后面就可以重新加载来做推理或者微调。
+CheckpointCallback 用起来很简单:
 
 .. code-block:: python
 
@@ -123,16 +132,40 @@ user_epochs_done_callback 是个接收 global_var_dict 为参数的函数。它
 记录要打印到屏幕的相关信息，比如像是验证集上的 loss。
 
 像是把简单的标量值打印到 tensorboard 上，user_epochs_done_callback 应该返回一个字典，
-字符串是keys,标量值是 values。这个 tag 到 value 的字典会被解析，每个元素都会被记录到
-tensorboard上 (需要 tensorboard writer 定义好)。
+字符串是keys,标量值是 values。
+这个 tag 到 value 的字典会被解析
+，每个元素都会被记录到tensorboard上 (需要 tensorboard writer 定义好)。
 
 如果想使用更复杂的 tensorboard 打印记录像是图像或者音频，
-EvaluatorCallback 必须要在初始化的时候传递给 tb_writer_func 函数。这个函数必须要接收一个
+EvaluatorCallback 必须要在初始化的时候传递给 tb_writer_func 函数。
+这个函数必须要接收一个
 `tensorboardX.SummaryWriter <https://tensorboardx.readthedocs.io/en/latest/tensorboard.html>`_
 参数，以及 user_epochs_done_callback 需要的参数和当前步。
 
 我们推荐用 user_epochs_done_callback 来简单返回 global_var_dict 
-从而给到 tb_writer_func 函数来处理。用户必须在 tb_writer_func 中记录所有需要的数据，
+在给到 tb_writer_func 函数来处理。用户必须在 tb_writer_func 中记录所有需要的数据，
 包括标量。
 
-例如，可以参考 <nemo_dir>/examples 下面的例子。
+你也可以在 Weights & Biases 实验追踪器中记录评估指标(evaluation metrics)。
+如果要这么做的话，请设置下面这些参数。另外，确保 wandb 已经安装，你可以运行 ``wandb login``。
+
+- wandb_name: W&B 实验名称
+- wandb_project: W&B 项目名称
+
+相关的例子，可以参考 <nemo_dir>/examples 中的脚本。
+
+WandbCallback
+-----------------
+WandbCallback 把损失值(loss)和评估指标记录到 `Weights & Biases <https://docs.wandb.com/>`_.
+确保 wandb 已经安装，你可以运行 ``wandb login``。
+
+这是一个轻量的回调函数，可以把 **训练** 时候的指标记录到 Weights & Biases。
+想要记录评估时候的指标, 参考上面的Evaluator Callback。
+
+它需要下面的参数:
+
+- train_tensors: 一个list的需要评估记录的张量(tensors)
+- wandb_name: W&B 实验名字
+- wandb_project: W&B 项目名字
+- args: argparse flags - 需要记录的高参(hyper parameters)。
+- update_freq: 记录更新的频率
\ No newline at end of file
diff --git a/docs/docs_zh/sources/source/tutorials/complex_training.rst b/docs/docs_zh/sources/source/tutorials/complex_training.rst
index 6ad2cb3523d7..63a2d20270be 100644
--- a/docs/docs_zh/sources/source/tutorials/complex_training.rst
+++ b/docs/docs_zh/sources/source/tutorials/complex_training.rst
@@ -1,15 +1,17 @@
-复杂训练流程（ GAN 例子）
-========================================
+复杂训练流程 (GAN 例子)
+========================
 
 目前为止，训练样本在所有可训练的神经模块中用了一个优化器来优化一个损失函数。
-NeMo 进一步扩充了用例，这些用例会用到多个损失函数和多个优化器。
+NeMo 进一步扩充了用例，
+这些用例会用到多个损失函数和多个优化器。
 
 .. note::
     我们所有的流程都只支持一个数据层。
 
 多个损失函数
 ---------------
-以我们之前的 Hello World 为例子。假设我们现在想要优化一个平方误差损失函数和 l1 损失函数。
+以我们之前的 Hello World 为例子。
+假设我们现在想要优化一个平方误差损失函数和 l1 损失函数。
 我们可以把这代表着两个损失函数的张量传给
 :meth:`NeuralFactory.train()<nemo.core.neural_factory.NeuralModuleFactory.train>` 。
 下面是一个例子：
@@ -51,10 +53,12 @@ NeMo 进一步扩充了用例，这些用例会用到多个损失函数和多个
              optimization_params={"num_epochs": 3, "lr": 0.0003},
              optimizer="sgd")
 
-我们可以进一步拓展这个优化器使得每次优化一个损失函数。比如说，我们不想
-根据 mse_loss + l1_loss 计算梯度，我们想先根据 mse_loss 计算梯度，做一个
-权重更新，然后根据 l1_loss 求导，再做另一个权重更新。那么我们必须要定义我们
-的训练循环：
+我们可以进一步拓展这个优化器使得每次优化一个损失函数。
+比如说，我们不想根据 mse_loss + l1_loss 计算梯度，
+我们想先根据 mse_loss 计算梯度，做一个权重更新，
+然后根据 l1_loss 求导，
+再做另一个权重更新。
+那么我们必须要定义我们的训练循环：
 
 .. code-block:: python
 
@@ -89,7 +93,8 @@ NeMo 进一步扩充了用例，这些用例会用到多个损失函数和多个
     # 我们需要创建优化器手动开启复杂的训练流程
     optimizer = nf.create_optimizer(
         optimizer="sgd",
-        # 注意我们需要指定我们想要优化的神经模块和神经模块张量
+        # 注意我们需要指定我们想要优化的神经模块和
+        # 神经模块张量
         things_to_optimize=[l1_loss_tensor, mse_loss_tensor],
         optimizer_params={"lr": 0.0003})
 
@@ -111,9 +116,11 @@ NeMo 进一步扩充了用例，这些用例会用到多个损失函数和多个
 
 多个优化器和多个损失函数
 ---------------------------------------
-NeMo 也支持用户想要定义多个优化器的用例。一个这样的例子是 GAN，我们想要给生成器
-一个优化器，给判别器一个优化器。我们也想要优化不同的损失函数。
-这个是来自 examples/images/gan.py 下面的支持这种操作的代码：
+NeMo 也支持用户想要定义多个优化器的用例。
+一个这样的例子是 GAN，我们想要给生成器一个优化器，给判别器一个优化器。
+我们也想要优化不同的损失函数。
+下面的代码来自 examples/images/gan.py，
+它支持了这种做法：
 
 .. code-block:: python
 
@@ -139,7 +146,8 @@ NeMo 也支持用户想要定义多个优化器的用例。一个这样的例子
     ...
 
     # 创建优化器
-    # 注意我们对于生成器和判别器分别想要一个优化器
+    # 注意我们对于生成器和判别器分别只想要一个优化器
+    # 进行优化
     optimizer_G = neural_factory.create_optimizer(
         things_to_optimize=[generator],
         ...)
@@ -148,7 +156,8 @@ NeMo 也支持用户想要定义多个优化器的用例。一个这样的例子
         ...)
 
     # 定义 training_loop
-    # 注意在我们的训练循环中，我们想要优化三次判别器再优化一次生成器
+    # 注意在我们的训练循环中，
+    # 我们想要优化三次判别器再优化一次生成器
     losses_G = [generator_loss]
     losses_D = [interpolated_loss, real_loss, grad_penalty]
     training_loop = [
diff --git a/docs/docs_zh/sources/source/tutorials/custommodules.rst b/docs/docs_zh/sources/source/tutorials/custommodules.rst
index 31b39181f8f0..abb114a76fab 100644
--- a/docs/docs_zh/sources/source/tutorials/custommodules.rst
+++ b/docs/docs_zh/sources/source/tutorials/custommodules.rst
@@ -23,13 +23,13 @@
    继承类关系图。假设 API 的类是绿色的。红色类是用户将要执行的。
 
 可训练模块
------------------
+------------
 .. note::
     注意 :class:`TrainableNM<nemo.backends.pytorch.nm.TrainableNM>` 类
     有两个基础类：:class:`NeuralModule<nemo.core.neural_modules.NeuralModule>` 类 和 ``torch.nn.Module``.
 
 从头定义模块
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~
 
 (1) 首先继承 :class:`TrainableNM<nemo.backends.pytorch.nm.TrainableNM>` 类。
 (2) 实现 ``input_ports`` 和 ``output_ports`` 属性，定义输入输出端口。
@@ -97,6 +97,7 @@
             return self.fc1(nx)
 
 
+
 转换 PyTorch 的 nn.Module
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -124,9 +125,10 @@
 ----------
 (1) 继承自 :class:`DataLayerNM<nemo.backends.pytorch.nm.DataLayerNM>` 类。
 (2) 实现 ``__len__`` 方法，返回数据集大小
-(3) 实现 ``dataset`` 或者 ``data_iterator`` 属性，返回一个PyTorch数据集对象或者你的数据集的迭代器。(没有使用的属性应该返回None)
+(3) 实现 ``dataset`` 或者 ``data_iterator`` 属性，返回一个 PyTorch 数据集对象或者你的数据集的迭代器。(没有使用的属性应该返回None)
 
-当实现构造函数的时候，你首先要调用基类构造函数，并且定义在output_ports定义 *仅输出端口* 。
+当实现构造函数的时候，你首先要调用基类构造函数，
+并且定义在 output_ports 定义 *仅输出端口* 。
 另外，模块应该接收像是 ``batch_size`` 和 ``shuffle`` 的参数。
 
 如果你使用了 ``torch.utils.data.Dataset`` 类 (*推荐方法*)，那么你可以实现 ``dataset`` 属性，一个数据加载器就会自动给你创建。
@@ -137,6 +139,7 @@
 
 这个例子把 PyTorch 的 *ImageFolder* 数据集封装成一个神经模块的数据层。
 
+
 .. code-block:: python
 
     import torch
@@ -146,9 +149,11 @@
     """这个类把 Pytorch 的 ImageFolder 数据集的 API 封装成了神经模块"""
 
     class ImageFolderDataLayer(DataLayerNM):
+
         @property
         def output_ports(self):
-            # 注意，我们会定义输出的高和宽
+            """返回模块输出端口的定义"""
+            # 注意，我们会定义输出的高和宽的尺寸张量
             # 因此需要一个size参数
             return {
                 "image": NeuralType(
@@ -196,12 +201,14 @@
 (3) 在构造函数里调用基类构造函数
 (4) 实现 :meth:`_loss_function<nemo.backends.pytorch.nm.LossNM._loss_function>` 方法。
 
+
 Example
 ~~~~~~~
 
 .. code-block:: python
 
     class CrossEntropyLoss(LossNM):
+    
         @property
         def input_ports(self):
             return {"predictions": NeuralType({
diff --git a/docs/docs_zh/sources/source/tutorials/intro.rst b/docs/docs_zh/sources/source/tutorials/intro.rst
index 469c9caa2029..ea3504429c21 100644
--- a/docs/docs_zh/sources/source/tutorials/intro.rst
+++ b/docs/docs_zh/sources/source/tutorials/intro.rst
@@ -8,6 +8,7 @@
    examples
    neuraltypes
    custommodules
+   module_configuration
    weightsharing
    callbacks
    complex_training
diff --git a/docs/docs_zh/sources/source/tutorials/module_configuration.rst b/docs/docs_zh/sources/source/tutorials/module_configuration.rst
new file mode 100644
index 000000000000..fcfeaddd3183
--- /dev/null
+++ b/docs/docs_zh/sources/source/tutorials/module_configuration.rst
@@ -0,0 +1,59 @@
+模块配置
+=========
+
+神经模块的配置可以从 YAML 文件导入，也可以导出到 YAML 文件 \
+一个模块配置文件存储了创建一个实例所需要的所有参数。
+
+.. note::
+    对于可训练的神经模块，`配置` 相对于 checkpoint 是起到了互补的作用。 \
+    配置文件包含了参数 (比如: 层的数量, 隐藏层的大小等)， \
+    而 checkpoint 包含了实际模块的权重
+
+
+导出配置文件
+---------------
+
+在下面的例子中，我们再次训练一个模型来学习 y=sin(x) 的泰勒系数 \
+但是，我们进一步的扩展了这个例子，展示了如果导出模块的配置，并写入到一个 YAML 文件中 \
+再用相同的参数创建第二个实例
+
+我们首先创建 :class:`NeuralFactory` 对象，从原始例子中初始化这个模块:
+
+.. literalinclude:: ../../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 24-34
+
+现在我们可以导出任何一个已有模块的配置，调用 :meth:`export_to_config()`, 例如 \
+我们可以导出 :class:`TaylorNet` 的配置，通过调用:
+
+.. literalinclude:: ../../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 37
+
+导入配置
+---------
+
+有个类似的函数 :meth:`import_from_config()` 负责加载配置文件:
+
+.. literalinclude:: ../../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 40
+
+.. note::
+    :meth:`import_from_config()` 函数事实上是创建了在配置中的这个类的一个新的实例 \
+    需要注意的是这两个类不贡献任何可训练的权重。 \
+    NeMo 给权重连接(weight tying)提供了另一套机制。
+
+现在我们可以像用其它模块那样用导入的模块 \
+例如，我们可以构建一个图，用 NeMo trainer 来训练:
+
+.. literalinclude:: ../../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 42-
+
+
+.. include:: module_custom_configuration.rst
+
+
+.. note::
+    上面(以及其它许多的)的例子可以在 `nemo/examples` 文件夹下找到
diff --git a/docs/docs_zh/sources/source/tutorials/module_custom_configuration.rst b/docs/docs_zh/sources/source/tutorials/module_custom_configuration.rst
new file mode 100644
index 000000000000..d9834a1d2dd4
--- /dev/null
+++ b/docs/docs_zh/sources/source/tutorials/module_custom_configuration.rst
@@ -0,0 +1,72 @@
+自定义配置
+-----------
+
+
+一个一般的配置导出应该可以让我们用基于原始数据类型(string, int, float)的参数 \
+或者用 list/dict 嵌套的原始数据类型。
+
+如果想拓展这个功能，用其它的自定义类型，用户需要为自己的模块类，重载 \
+方法 :meth:`export_to_config()` 和  :meth:`import_from_config()`  \
+下面这个教程解释了我们该怎么做。
+
+
+下面的例子中，我们从 :class:`TaylorNet` (在前面的例子中有使用过这个类) 中得到我们的类 \
+然后用这些方法来拓展它。首先我们定义一个简单的类 :class:`Status` enum:
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 28-30
+
+现在让我们定义 :class:`CustomTaylorNet` 神经模块类:
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 33-38
+
+
+为了能处理好 :class:`Status` enum 的导出功能，我们必须实现自定义函数 \
+:meth:`export_to_config()`:
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 40-61
+
+
+注意配置实际上是一个字典，包含了两个部分:
+
+ * ``header`` (存储类的说明, NeMo 版本, NeMo 集合名称等) 以及
+ * ``init_params`` 存储初始化对象所需要的参数
+
+这些参数存在保护域 ``self._init_params`` 中，它的基类是 :class:`NeuralModule` 类。
+确保用户不能直接访问和使用它们。 
+
+类似地，我们必须重载方法 :meth:`_deserialize_configuration()` :
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 63-86
+
+.. note::
+    再强调一下 :meth:`_deserialize_configuration()` 是类的方法，实际上返回 \
+    一个新的对象实例 - 在这个例子中就是 :class:`CustomTaylorNet` 类型。
+
+
+现在我们可以简单的构建一个实例，并且导出它的配置，通过调用:
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 95-96,101-102
+
+通过加载这个配置，初始化第二个实例:
+
+.. literalinclude:: ../../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 104-106
+
+从结果中我们可以看到新的对象把状态都设置成了原来那个对象的值:
+
+.. code-block:: bash
+
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:74] Configuration of module 3ec99d30-baba-4e4c-a62b-e91268762864 (CustomTaylorNet) exported to /tmp/custom_taylor_net.yml
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:41] Status: Status.error
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:114] Instantiated a new Neural Module of type `CustomTaylorNet` using configuration loaded from the `/tmp/custom_taylor_net.yml` file
diff --git a/docs/docs_zh/sources/source/tutorials/neuraltypes.rst b/docs/docs_zh/sources/source/tutorials/neuraltypes.rst
index 853b181ed558..124a80068525 100644
--- a/docs/docs_zh/sources/source/tutorials/neuraltypes.rst
+++ b/docs/docs_zh/sources/source/tutorials/neuraltypes.rst
@@ -1,80 +1,182 @@
 神经类型
 ============
 
-神经类型是用来检查输入张量，确保两个神经模块是兼容的，并且捕捉语义和维度上的错误。
+基础
+~~~~~~
 
-神经类型在 :class:`NeuralType<nemo.core.neural_types.NeuralType>` 类中实现，它把张量的轴映射到 :class:`AxisType<nemo.core.neural_types.AxisType>`。
+每个神经模块的输入和输出端口都是有类型的。
+类型系统的目标是要检查相连输入/输出端口对之间的兼容性。
+当用户连接各个模块以及在训练和推理开始之前，类型系统的约束限制都会被检查。
 
-:class:`AxisType<nemo.core.neural_types.AxisType>` 每个轴包含下列信息：
+神经类型 (Neural Types) 在 Python 类 :class:`NeuralType<nemo.core.neural_types.NeuralType>` 中实现，帮助类
+由 :class:`ElementType<nemo.core.neural_types.ElementType>`, :class:`AxisType<nemo.core
+.neural_types.AxisType>` 和 :class:`AxisKindAbstract<nemo.core.neural_types.AxisKindAbstract>` 这几个类得到。
 
-* 语义标签（Semantic Tag）, 必须继承 :class:`BaseTag<nemo.core.neural_types.BaseTag>`类，比如 :class:`BatchTag<nemo.core.neural_types.BatchTag>`, :class:`ChannelTag<nemo.core.neural_types.ChannelTag>`, :class:`TimeTag<nemo.core.neural_types.TimeTag>` 等。这些标签是 `is-a` 的继承关系。
-* 维度（Dimension）: 无符号整形
-* 描述符（Descriptor）: 字符串
+**一个神经类型包含两类信息**
 
+* **axes** - 表示特定轴的含义 (e.g. batch, time, 等)
+* **elements_type** - 表示存在里面的激活元的语义和属性 (audio signal,text embedding, logits, 等)
 
-初始化神经类型，你应该给它传递一个字典（轴到类型），把轴映射到它的AxisType。
-比如，ResNet18 的输入和输出端口可以这么描述：
+
+如果想初始化一个NeuralType, 你需要传递给它下面的参数: `axes: Optional[Tuple] = None,
+elements_type: ElementType = VoidType(), optional=False`. 通常，初始化
+:class:`NeuralType<nemo.core.neural_types.NeuralType>` 对象的地方是在模块里面的 `input_ports` 和
+`output_ports` 属性中。
+
+
+考虑下面的这个例子。它表示了一个在语音识别集合中用到的(音频) 数据层的输出端口。
 
 .. code-block:: python
 
-    input_ports = {"x": NeuralType({0: AxisType(BatchTag),
-                                    1: AxisType(ChannelTag),
-                                    2: AxisType(HeightTag, 224),
-                                    3: AxisType(WidthTag, 224)})}
-    output_ports = {"output": NeuralType({
-                                    0: AxisType(BatchTag),
-                                    1: AxisType(ChannelTag)})}
+        {
+            'audio_signal': NeuralType(axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=False),
+                                             AxisType(kind=AxisKind.Time, size=None, is_list=False)),
+                                       elements_type=AudioSignal(freq=self._sample_rate)),
+            'a_sig_length': NeuralType(axes=tuple(AxisType(kind=AxisKind.Batch, size=None, is_list=False)),
+                                       elements_type=LengthsType()),
+            'transcripts': NeuralType(axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=False),
+                                             AxisType(kind=AxisKind.Time, size=None, is_list=False)),
+                                      elements_type=LabelsType()),
+            'transcript_length': NeuralType(axes=tuple(AxisType(kind=AxisKind.Batch, size=None, is_list=False)),
+                                            elements_type=LengthsType()),
+        }
+
+一个具有一样输出端口的更加精简的版本:
 
+.. code-block:: python
 
+        {
+            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            'a_sig_length': NeuralType(tuple('B'), LengthsType()),
+            'transcripts': NeuralType(('B', 'T'), LabelsType()),
+            'transcript_length': NeuralType(tuple('B'), LengthsType()),
+        }
 
-**神经类型比较**
 
-两个 :class:`NeuralType<nemo.core.neural_types.NeuralType>` 对象可以通过 ``.compare`` 方法来进行比较。
-结果是:
+
+神经类型比较
+~~~~~~~~~~~~~~~~~~~~~~
+
+两个 :class:`NeuralType<nemo.core.neural_types.NeuralType>` 对象可以用 ``.compare`` 方法来进行比较。
+比较的结果来自 :class:`NeuralTypeComparisonResult<nemo.core.neural_types.NeuralTypeComparisonResult>`:
 
 .. code-block:: python
 
     class NeuralTypeComparisonResult(Enum):
-      """比较两个神经类型兼容性的结果
-      A.compare_to(B):"""
-      SAME = 0
-      LESS = 1  # A 是 B
-      GREATER = 2  # B 是 A
-      DIM_INCOMPATIBLE = 3  # 重新调整连接器也许可以修复不兼容
-      TRANSPOSE_SAME = 4 # 把 A 转置可以使它们相同
-      INCOMPATIBLE = 5  # A 和 B 不兼容。不能自动修复不兼容
+        """比较两个神经类型对象兼容性的比较结果，
+        使用 A.compare_to(B):"""
+
+        SAME = 0
+        LESS = 1  # A 是 B
+        GREATER = 2  # B 是 A
+        DIM_INCOMPATIBLE = 3  # 调整连接的大小 (resize) 也许可以修复不兼容性
+        TRANSPOSE_SAME = 4  # 转置以及/或者在 lists 和 tensors 之间的转换可以让它们一致
+        CONTAINER_SIZE_MISMATCH = 5  # A 和 B 包含不同数量的元素
+        INCOMPATIBLE = 6  # A 和 B 不兼容
+        SAME_TYPE_INCOMPATIBLE_PARAMS = 7  # A 和 B 相同类型但参数化不同
+
+
+特殊例子
+~~~~~~~~~~~~~
+
+* **Void** 元素类型。有时候，有个像  C/C++ 中 "void*" 的功能还挺有必要的。也就是说，我们想强制顺序(order)和轴的语义但是又要求能接受任何类型的元素。我们可以用 :class:`VoidType<nemo.core.neural_types.VoidType>` 实例作为 ``elements_type`` 。
+* **Big void** 这种类型会取禁用掉所有的类型检查。可以这样创建这个类型: ``NeuralType()``. 它和其它类型的比较结果永远都是 SAME。
+* **AxisKind.Any** 这个轴类型(kind)用来表示任意的轴类型。这个很管用，比如，在损失函数中，一个特定的损失函数模块可以用在不同的应用中，表示不同的轴类型。
+
+继承
+~~~~~~~~~~~
+
+类型继承在编程中是非常强大的工具。 NeMo 的神经类型支持继承。考虑
+下面这个例子。
+
+**例子.** 我们想要表示: A 模块的 A 输出 (out1) 产生梅尔谱(mel-spectrogram)
+信号, 而模块 B 输出产生 mffc 频谱。我们也想要一个模块 C 可以对任意频谱做数据增强
+用 NeMo 的神经类型表示这种语义就很容易:
 
+.. code-block:: python
+
+    input = NeuralType(('B', 'D', 'T'), SpectrogramType())
+    out1 = NeuralType(('B', 'D', 'T'), MelSpectrogramType())
+    out2 = NeuralType(('B', 'D', 'T'), MFCCSpectrogramType())
+
+    # 会生成下面的结果
+    input.compare(out1) == SAME
+    input.compare(out2) == SAME
+    out1.compare(input) == INCOMPATIBLE
+    out2.compare(out1) == INCOMPATIBLE
+
+之所以会这样是因为 ``MelSpectrogramType`` 和 ``MFCCSpectrogramType`` 都继承自 ``SpectrogramType`` 类。
+注意, mfcc 和 mel 频谱是不能互换的，这就是为什么 ``out1.compare(input) == INCOMPATIBLE``
 
-**特殊例子**
+高级用法
+~~~~~~~~~~~~~~
 
-* *Non-tensor* 对象应该用 ``NeuralType(None)`` 表示。
-* *Optional*: 输入是可选的，如果提供了类型输入，那么会自动做类型检测
-* *Root* 类型可以用 ``NeuralType({})`` 表示: ``NeuralType({})`` 类型的端口必须可以接收任意的神经类型的神经模块张量（NmTensors）：
+**使用用户定义的类型** 如果你相加自己的元素类型, 创建一个新的继承类
+:class:`ElementType<nemo.core.neural_types.ElementType>` 的类。除了使用内置的轴类型
+:class:`AxisKind<nemo.core.neural_types.AxisKind>`, 你可以定义自己的轴类型
+创建一个新的 Python enum, 继承 :class:`AxisKindAbstract<nemo.core.neural_types.AxisKindAbstract>`.
+
+**列表(Lists)**. 有时候模块的输入和输出应该是一个 List 的(也有可能是嵌套的)张量。 NeMo 的
+:class:`AxisType<nemo.core.neural_types.AxisType>` 类接受 ``is_list`` 参数，它可以设置为 True。
+考虑下面的例子:
 
 .. code-block:: python
 
-    root_type = NeuralType({})
-    root_type.compare(any_other_neural_type) == NeuralTypeComparisonResult.SAME
+        T1 = NeuralType(
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                AxisType(kind=AxisKind.Time, size=None, is_list=True),
+                AxisType(kind=AxisKind.Dimension, size=32, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=128, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=256, is_list=False),
+            ),
+            elements_type=ChannelType(),
+        )
+
+这个例子中，前两个轴是 list。这个对象的 list 中的 list 中的元素秩为3的张量，张量维度为(32x128x256).
+注意 list 的轴必须在其它张量轴的前面。
 
-参考 "nemo/tests/test_neural_types.py" 中更多的例子。
+.. tip::
+    我们强烈建议避免这么做。可能的话还是用张量带 padding 的方式来做。
 
 
-**神经类型帮助我们调试程序**
+**命名元组(Named tuples) (数据结构).** 为了能够表示结构化的对象, 例如：在计算机视觉中的边界框(bounding box), 
+可以用下面的语句:
 
-有许多的错误类型在运行和编译的时候不会报错，比如：
+.. code-block:: python
+
+        class BoundingBox(ElementType):
+            def __str__(self):
+                return "bounding box from detection model"
+            def fields(self):
+                return ("X", "Y", "W", "H")
+        # 加新的用户定义的轴类型
+        class AxisKind2(AxisKindAbstract):
+            Image = 0
+        T1 = NeuralType(elements_type=BoundingBox(),
+                        axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                              AxisType(kind=AxisKind2.Image, size=None, is_list=True)))
+
+在上面的例子中, 我们给边界框构建了一个特别的 "element type" 类，包含了4个值。
+我们也加了自己的轴类型(Image). 所以最后的神经类型(T1) 表示的是 lists(batch) 的 lists (
+image) 的边界框。就是说，它是 list(lists(4x1 张量))。
+
+
+**神经类型帮助我们调试模型**
+
+有一个很大的错误类, 在运行和编译的时候不会报错。比如:
 
 (1) "Rank matches but semantics doesn't".
 
-例如，模块 A 的数据格式是 [Batch, Time, Dim]，但是模块B期望的格式是 [Time, Batch, Dim]。简单的轴转置就可以解决这个错误。
+比如，模块 A 产生的数据格式是 [Batch, Time, Dim] 而模块 B 期望的格式是 [Time, Batch, Dim]。简单的轴转置就可以解决这个错。
 
 (2) "Concatenating wrong dimensions".
 
-例如, 模块应该根据 0 号维度合并（加）两个输入张量 X 和 Y。但是张量 X 格式是 [B, T, D]，但是张量 Y 格式是 [T, B, D] 然后做合并。
+例如, 模块应该沿着维度 0 合并(相加)两个输入张量 X 和 Y。但是张量 X 格式为 [B, T, D] 而张量 Y=[T, B, D] 然后合并. .
 
 (3) "Dimensionality mismatch"
 
-一个模块期望图片尺寸是 224x224 但是得到的是 256x256。这种类型比较会导致 ``NeuralTypeComparisonResult.DIM_INCOMPATIBLE``。
+一个模块想要一张大小为 224x224 的图片，但得到的是 256x256。类型比较的结果是 ``NeuralTypeComparisonResult.DIM_INCOMPATIBLE`` 。
+
 
-.. note::
-    这个类型机制是由 Python 继承表示的。也就是说 :class:`NmTensor<nemo.core.neural_types.NmTensor>` 类继承自 :class:`NeuralType<nemo.core.neural_types.NeuralType>` 类。
 
diff --git a/docs/docs_zh/sources/source/tutorials/weightsharing.rst b/docs/docs_zh/sources/source/tutorials/weightsharing.rst
index 6844933612cc..ade72d8f7cca 100644
--- a/docs/docs_zh/sources/source/tutorials/weightsharing.rst
+++ b/docs/docs_zh/sources/source/tutorials/weightsharing.rst
@@ -15,7 +15,7 @@
     train_dataloader = nemo.TrainDataLayer(**train_config)
     eval_dataloader = nemo.EvalDataLayer(**eval_config)
 
-    L = nemo.MaskedXEntropyLoss()
+    L = nemo.tutorials.MaskedXEntropyLoss()
 
     # 训练模型
 
@@ -41,13 +41,14 @@
 :class:`NeuralModule<nemo.core.neural_modules.NeuralModule>` 类提供了两个方法
 :meth:`get_weights<nemo.core.neural_modules.NeuralModule.get_weights>` 和
 :meth:`set_weights<nemo.core.neural_modules.NeuralModule.set_weights>` 
-用来做权重共享
+用来做权重复制
 
 .. note::
     :meth:`set_weights<nemo.core.neural_modules.NeuralModule.set_weights>` 方法只能设置模块的部分权重
 
 .. important::
     这个方法只能用来复制权重。后续在一个模块中更新权重不会影响到其他模块中的权重。
+    这就意味着在更新步中，权重会得到不同的梯度。
 
 考虑下面这个例子:
 
@@ -77,15 +78,16 @@
 :class:`NeuralModule<nemo.core.neural_modules.NeuralModule>` 类提供 :meth:`tie_weights_with<nemo.core.neural_modules.NeuralModule.tie_weights_with>` 方法在多个模块间连接权重
 
 .. important::
-    连接后的权重在所有的模块之间保持一致，后续对一个模块中权重的改变也会使得其他模块中的权重有相同的改变
+    连接后的权重在所有的模块之间保持一致，对于权重的梯度都是相同的。
 
+.. important::
+    然而手动通过 tensor.data 在一个模块上更新权重是不会更新 w 的。
+    
 在下面的例子中，我们首先创建一个简单的词嵌入编码器，它的输入是 [batch, time] 的词序列，从词表中 ``V`` 中找到词id，把它映射到 ``D`` 维空间。
-这是一个查表的映射，从 ``V`` 维空间到 ``D`` 维空间。
-接着我们需要创建一个解码器，从 ``D`` 维空间映射到 ``V`` 维空间。我们想把编码器的映射矩阵在解码器中重用。
-下面的代码解释了这要怎么做。
+这是一个查表的映射，从 ``V`` 维空间到 ``D`` 维空间。接着我们需要创建一个解码器，从 ``D`` 维空间映射到 ``V`` 维空间。我们想把编码器的映射矩阵在解码器中重用。下面的代码解释了这要怎么做。
 
 .. note::
-   权重有不同名字（``embedding.weight`` 和 ``projection.weight``） 但值是一样的。对一个权重的改变会导致另一个也变化。可以理解为 ``embedding.weight`` 和 ``projection.weight`` 是指向同一个张量的指针。
+   权重有不同名字 (``embedding.weight`` 和 ``projection.weight``) 但值和梯度更新是一样的。
 
 
 .. code-block:: python
@@ -103,14 +105,8 @@
     self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(),
                                    proj.projection.weight.detach().numpy()))
 
-    was = embd.embedding.weight.detach().numpy()
-
-    # 现在，我们在一个对象上改变值
-    embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2))*1.0)
-    after = embd.embedding.weight.detach().numpy()
+.. warning::
+    手动设置权重张量等于其它的张量有可能会打断多 GPU 和多节点(multi-node)运行，比如:
+    ``embd.embedding.weight = proj.projection.weights`` 不太推荐，推荐使用 ``tie_weights_with()`` 函数
 
-    # 确保另一个对象上的值也得到了相应的变化
-    self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(),
-                                    proj.projection.weight.detach().numpy()))
-    self.assertFalse(np.array_equal(was, after))
 
diff --git a/docs/sources/source/api-docs/nemo.rst b/docs/sources/source/api-docs/nemo.rst
index 8ff87c9b7527..f5893ec7ea87 100644
--- a/docs/sources/source/api-docs/nemo.rst
+++ b/docs/sources/source/api-docs/nemo.rst
@@ -18,6 +18,14 @@ neural_modules
     :undoc-members:
     :show-inheritance:
 
+neural_graph
+--------------
+
+.. automodule:: nemo.core.neural_graph
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 neural_factory
 --------------
 
diff --git a/docs/sources/source/asr/8kHz_models.rst b/docs/sources/source/asr/8kHz_models.rst
new file mode 100644
index 000000000000..b075418d2314
--- /dev/null
+++ b/docs/sources/source/asr/8kHz_models.rst
@@ -0,0 +1,11 @@
+8kHz Models
+===========
+
+For applications based on 8 kHz speech, we recommend using the Quartznet model available `here <https://ngc.nvidia.com/catalog/models/nvidia:multidataset_quartznet15x5>`__.
+This model for 16 kHz speech was trained on multiple datasets including about 1700 hours of upsampled narrowband conversational telephone speech from
+Fisher and Switchboard datasets. In our experiments, we found that this model's accuracy on 8 kHz
+speech is at par with a model trained exclusively on narrowband speech.
+
+For best accuracy in your application, you may choose to :ref:`fine-tune <fine-tune>` this model using data collected from your application.
+
+
diff --git a/docs/sources/source/asr/asr_all.bib b/docs/sources/source/asr/asr_all.bib
index 4041be618e51..5eb0704b073f 100644
--- a/docs/sources/source/asr/asr_all.bib
+++ b/docs/sources/source/asr/asr_all.bib
@@ -60,8 +60,6 @@ @misc{ardila2019common
     primaryClass={cs.CL}
 }
 
-
-
 @article{graves2012,
   title={Sequence Transduction with Recurrent Neural Networks},
   author={Graves, Alex},
@@ -925,3 +923,16 @@ @article{novograd2019
           eid = {arXiv:1905.11286},
        eprint = {1905.11286},
 }
+
+@article{kriman2019quartznet,
+  title={Quartznet: {Deep} automatic speech recognition with 1d time-channel separable convolutions},
+  author={Kriman, Samuel and Beliaev, Stanislav and Ginsburg, Boris and Huang, Jocelyn and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Zhang, Yang},
+  journal={arXiv preprint arXiv:1910.10261},
+  year={2019}
+}
+
+@misc{itu1988g711,
+    title={{ITU-T} {G.711} - {Pulse} code modulation ({PCM}) of voice frequencies},
+    author={ITU-T Geneva Switzerland},
+    year={1988},
+}
diff --git a/docs/sources/source/asr/datasets.rst b/docs/sources/source/asr/datasets.rst
index 6e6467ee5045..349e584af6ec 100644
--- a/docs/sources/source/asr/datasets.rst
+++ b/docs/sources/source/asr/datasets.rst
@@ -7,7 +7,7 @@ LibriSpeech
 -----------
 
 Run these scripts to download LibriSpeech data and convert it into format expected by `nemo_asr`.
-You should have at least 110GB free space.
+You should have at least 250GB free space.
 
 .. code-block:: bash
 
@@ -127,7 +127,7 @@ Running the following script will convert the .sph files to .wav using sox, and
 Once this script finishes, you should have a `train_manifest.json` and `test_manifest.json` in the `<data_root>/an4/` directory.
 
 Aishell1
------------------------------------
+--------
 
 Run these scripts to download Aishell1 data and convert it into format expected by `nemo_asr`.
 
@@ -141,7 +141,7 @@ Run these scripts to download Aishell1 data and convert it into format expected
 After this, your `data` folder should contain a `data_aishell` folder which contains wav, transcript folder and related `.json` files and `vocab.txt`.
 
 Aishell2
------------------------------------
+--------
 
 Run the script to process AIShell-2 dataset in order to generate files in the supported format of  `nemo_asr`. You should set the data folder of AIShell-2 using `--audio_folder` and where to push these files using `--dest_folder`.
 
diff --git a/docs/sources/source/asr/installation.rst b/docs/sources/source/asr/installation.rst
new file mode 100644
index 000000000000..63de9ae2b4f7
--- /dev/null
+++ b/docs/sources/source/asr/installation.rst
@@ -0,0 +1,30 @@
+Installation
+============
+
+Neural Modules and their corresponding collections have certain requirements that can be optionally installed to
+improve performance of operations.
+
+Torch Audio
+-----------
+
+The `torchaudio` library is used for certain audio pre-processing Neural Modules. Primarily,
+
+ - AudioToMFCCPreprocessor
+ - TimeStretchAugmentation
+
+Official installation directions are provided at the `torchaudio github page <https://github.com/pytorch/audio>`_. It is recommended to follow
+the conda installation procedure and install the latest version of the library available on conda.
+
+Numba
+-----
+
+The `numba` library is used for optimized execution of certain data augmentation procedures that can be used during
+data pre-processing. It can substantially reduce execution time during training, and is a recommended installation for
+Neural Modules.
+
+Official installation directions are provided at the `numba github page <https://github.com/numba/numba>`_. It is recommended to follow
+the conda installation procedure and install the latest version of the library available on conda.
+
+.. code-block:: bash
+
+    conda install numba
diff --git a/docs/sources/source/asr/intro.rst b/docs/sources/source/asr/intro.rst
index f50aee692821..cfa5d1e16919 100644
--- a/docs/sources/source/asr/intro.rst
+++ b/docs/sources/source/asr/intro.rst
@@ -6,9 +6,12 @@ Speech Recognition
 .. toctree::
    :maxdepth: 8
 
+   installation
    tutorial
    datasets
    models
+   8kHz_models
+
 
 
 
diff --git a/docs/sources/source/asr/jasper.png b/docs/sources/source/asr/jasper.png
deleted file mode 100644
index fe247f6ed00f..000000000000
Binary files a/docs/sources/source/asr/jasper.png and /dev/null differ
diff --git a/docs/sources/source/asr/jasper.rst b/docs/sources/source/asr/jasper.rst
index dc136cbb5f19..ec98d88ae0f1 100644
--- a/docs/sources/source/asr/jasper.rst
+++ b/docs/sources/source/asr/jasper.rst
@@ -23,3 +23,10 @@ Jasper10x5dr  | Librispeech,          `here <https://ngc.nvidia.com/catalog/mode
               | Switchboard
 Jasper15x5SEP Aishell2                `here <https://ngc.nvidia.com/catalog/models/nvidia:aishell2_jasper10x5dr>`__
 ============= ======================= =================================================================================
+
+References
+^^^^^^^^^^
+.. bibliography:: asr_all.bib
+    :style: plain
+    :labelprefix: ASR-MODELS
+    :keyprefix: asr-models-
\ No newline at end of file
diff --git a/docs/sources/source/asr/models.rst b/docs/sources/source/asr/models.rst
index 57f529bc5298..66b5249af508 100644
--- a/docs/sources/source/asr/models.rst
+++ b/docs/sources/source/asr/models.rst
@@ -7,10 +7,3 @@ Models
    jasper
    quartznet
 
-References
--------------
-
-.. bibliography:: asr_all.bib
-    :style: plain
-    :labelprefix: ASR-MODELS
-    :keyprefix: asr-models-
\ No newline at end of file
diff --git a/docs/sources/source/asr/quartz_vertical.png b/docs/sources/source/asr/quartz_vertical.png
index 39ef7534c783..4cbede907736 100644
Binary files a/docs/sources/source/asr/quartz_vertical.png and b/docs/sources/source/asr/quartz_vertical.png differ
diff --git a/docs/sources/source/asr/quartznet.rst b/docs/sources/source/asr/quartznet.rst
index 6dbadab71907..70af4758c3a6 100644
--- a/docs/sources/source/asr/quartznet.rst
+++ b/docs/sources/source/asr/quartznet.rst
@@ -1,7 +1,9 @@
+.. _Quartznet_model:
+
 QuartzNet
 ---------
 
-QuartzNet is a version of Jasper :cite:`asr-models-li2019jasper` model with separable convolutions and larger filters. It can achieve performance
+QuartzNet :cite:`qtz-models-kriman2019quartznet` is a version of Jasper :cite:`qtz-models-li2019jasper` model with separable convolutions and larger filters. It can achieve performance
 similar to Jasper but with an order of magnitude less parameters.
 Similarly to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D *separable* convolution, batch normalization, ReLU, and dropout:
 
@@ -9,22 +11,27 @@ Similarly to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] w
         :align: center
         :alt: quartznet model
    
-    .. note:: This checkpoint was trained on LibriSpeech :cite:`panayotov2015librispeech` and full "validated" part of En Mozilla Common Voice :cite:`ardila2019common`
-
 `QuartzNet paper <https://arxiv.org/abs/1910.10261>`_.
 
-Pretrained models can be found, `here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`_.
+Pretrained models can be found at the following links:
 
-============= ===================== ==============================================================================
+============= ===================== ==================================================================================
 Network       Dataset               Download Link 
-============= ===================== ==============================================================================
+============= ===================== ==================================================================================
 QuartzNet15x5 Librispeech,          `here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`__
               Mozilla Common Voice
 QuartzNet15x5 Aishell2              `here <https://ngc.nvidia.com/catalog/models/nvidia:aishell2_quartznet15x5>`__
-============= ===================== ==============================================================================
+QuartzNet15x5 Librispeech,          `here <https://ngc.nvidia.com/catalog/models/nvidia:multidataset_quartznet15x5>`__
+              Common Voice,
+              Fisher, WSJ,
+              Switchboard
+============= ===================== ==================================================================================
 
 References
-----------
+^^^^^^^^^^
 
 .. bibliography:: asr_all.bib
     :style: plain
+    :labelprefix: QTZ-MODELS
+    :keyprefix: qtz-models-
+
diff --git a/docs/sources/source/asr/tutorial.rst b/docs/sources/source/asr/tutorial.rst
index 7067f834f800..71102c47361f 100644
--- a/docs/sources/source/asr/tutorial.rst
+++ b/docs/sources/source/asr/tutorial.rst
@@ -13,17 +13,20 @@ A more introductory, Jupyter notebook ASR tutorial can be found `on GitHub <http
 Introduction
 -------------
 
-This Automatic Speech Recognition (ASR) tutorial is focused on Jasper :cite:`asr-tut-li2019jasper` model. Jasper is CTC-based :cite:`asr-tut-graves2006` end-to-end model. The model is called "end-to-end" because it transcripts speech samples without any additional alignment information. CTC allows finding an alignment between audio and text. 
-CTC-ASR training pipeline consists of the following blocks:
+This Automatic Speech Recognition (ASR) tutorial is focused on QuartzNet :cite:`asr-tut-kriman2019quartznet` model.
+QuartzNet is a CTC-based :cite:`asr-tut-graves2006` end-to-end model. The model is called "end-to-end" because it
+transcribes speech samples without any additional alignment information. CTC allows for finding an alignment between
+audio and text.
+
+The CTC-ASR training pipeline consists of the following blocks:
 
 1. Audio preprocessing (feature extraction): signal normalization, windowing, (log) spectrogram (or mel scale spectrogram, or MFCC)
 2. Neural acoustic model (which predicts a probability distribution P_t(c) over vocabulary characters c per each time step t given input features per each timestep)
 3. CTC loss function
 
-    .. image:: ctc_asr.png
-        :align: center
-        :alt: CTC-based ASR
-
+.. image:: ctc_asr.png
+    :align: center
+    :alt: CTC-based ASR
 
 
 Get data
@@ -42,7 +45,10 @@ We will be using an open-source LibriSpeech :cite:`asr-tut-panayotov2015librispe
     # python get_librispeech_data.py --data_root=data --data_set=ALL
 
 .. note::
-    You should have at least 26GB of disk space available if you've used ``--data_set=dev_clean,train_clean_100``; and at least 110GB if you used ``--data_set=ALL``. Also, it will take some time to download and process, so go grab a coffee.
+    You should have at least 52GB of disk space available if you've used ``--data_set=dev_clean,train_clean_100``; and
+    at least 250GB if you used ``--data_set=ALL``. Also, it will take some time to download and process, so go grab a
+    coffee. After downloading, you can remove the original .tar.gz archives and .flac files to cut the disk usage in
+    half.
 
 
 After download and conversion, your `data` folder should contain 2 json files:
@@ -60,23 +66,25 @@ Each line in json file describes a training sample - `audio_filepath` contains p
 
 
 
-Training 
----------
+Training
+--------
 
-We will train a small model from the Jasper family :cite:`asr-tut-li2019jasper`.
-Jasper ("Just Another SPeech Recognizer") is a deep time delay neural network (TDNN) comprising of blocks of 1D-convolutional layers. 
-Jasper family of models are denoted as Jasper_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D convolution, batch normalization, ReLU, and dropout:
+We will train a small model from the QuartzNet family :cite:`asr-tut-kriman2019quartznet`. QuartzNet models are similar
+to time delay neural networks (TDNN) composed of 1D convolutions. However QuartzNet models use separable convolutions
+to reduce the total number of parameters. The Quartznet family of models are denoted as QuartzNet_[BxR] where B is the
+number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a
+1-D separable convolution, batch normalization, and ReLU:
 
-    .. image:: jasper.png
-        :align: center
-        :alt: japer model
+.. image:: quartz_vertical.png
+    :align: center
+    :alt: quartznet model
 
 
 In the tutorial we will be using model [12x1] and will be using separable convolutions.
 The script below does both training (on `train_clean_100.json`) and evaluation (on `dev_clean.json`) on single GPU:
 
-    .. tip::
-        Run Jupyter notebook and walk through this script step-by-step
+.. tip::
+    Run a Jupyter notebook and walk through this script step-by-step
 
 
 **Training script**
@@ -91,7 +99,7 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
     # Create a Neural Factory
     # It creates log files and tensorboard writers for us among other functions
     nf = nemo.core.NeuralModuleFactory(
-        log_dir='jasper12x1SEP',
+        log_dir='QuartzNet12x1',
         create_tb_writer=True)
     tb_writer = nf.tb_writer
 
@@ -101,15 +109,15 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
     # Path to our validation manifest
     eval_datasets = "<path_to_where_you_put_data>/dev_clean.json"
 
-    # Jasper Model definition
+    # QuartzNet Model definition
     from ruamel.yaml import YAML
 
     # Here we will be using separable convolutions
     # with 12 blocks (k=12 repeated once r=1 from the picture above)
     yaml = YAML(typ="safe")
-    with open("<nemo_git_repo_root>/examples/asr/configs/jasper12x1SEP.yaml") as f:
-        jasper_model_definition = yaml.load(f)
-    labels = jasper_model_definition['labels']
+    with open("<nemo_git_repo_root>/examples/asr/configs/quartznet12x1.yaml") as f:
+        quartznet_model_definition = yaml.load(f)
+    labels = quartznet_model_definition['labels']
 
     # Instantiate neural modules
     data_layer = nemo_asr.AudioToTextDataLayer(
@@ -122,10 +130,10 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor()
     spec_augment = nemo_asr.SpectrogramAugmentation(rect_masks=5)
 
-    jasper_encoder = nemo_asr.JasperEncoder(
+    encoder = nemo_asr.JasperEncoder(
         feat_in=64,
-        **jasper_model_definition['JasperEncoder'])
-    jasper_decoder = nemo_asr.JasperDecoderForCTC(
+        **quartznet_model_definition['JasperEncoder'])
+    decoder = nemo_asr.JasperDecoderForCTC(
         feat_in=1024, num_classes=len(labels))
     ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
@@ -135,9 +143,9 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
     processed_signal, processed_signal_len = data_preprocessor(
         input_signal=audio_signal, length=audio_signal_len)
     aug_signal = spec_augment(input_spec=processed_signal)
-    encoded, encoded_len = jasper_encoder(
+    encoded, encoded_len = encoder(
         audio_signal=aug_signal, length=processed_signal_len)
-    log_probs = jasper_decoder(encoder_output=encoded)
+    log_probs = decoder(encoder_output=encoded)
     predictions = greedy_decoder(log_probs=log_probs)
     loss = ctc_loss(
         log_probs=log_probs, targets=transcript,
@@ -150,9 +158,9 @@ The script below does both training (on `train_clean_100.json`) and evaluation (
     processed_signal_v, processed_signal_len_v = data_preprocessor(
         input_signal=audio_signal_v, length=audio_signal_len_v)
     # Note that we are not using data-augmentation in validation DAG
-    encoded_v, encoded_len_v = jasper_encoder(
+    encoded_v, encoded_len_v = encoder(
         audio_signal=processed_signal_v, length=processed_signal_len_v)
-    log_probs_v = jasper_decoder(encoder_output=encoded_v)
+    log_probs_v = decoder(encoder_output=encoded_v)
     predictions_v = greedy_decoder(log_probs=log_probs_v)
     loss_v = ctc_loss(
         log_probs=log_probs_v, targets=transcript_v,
@@ -243,7 +251,6 @@ To train with mixed-precision all you need is to set `optimization_level` parame
         backend=nemo.core.Backend.PyTorch,
         local_rank=args.local_rank,
         optimization_level=nemo.core.Optimization.mxprO1,
-        placement=nemo.core.DeviceType.AllGpu,
         cudnn_benchmark=True)
 
 .. note::
@@ -254,45 +261,48 @@ Multi-GPU training
 
 Enabling multi-GPU training with NeMo is easy:
 
-   (1) First set `placement` to `nemo.core.DeviceType.AllGpu` in NeuralModuleFactory and in your Neural Modules
+   (1) First set `placement` to `nemo.core.DeviceType.AllGpu` in NeuralModuleFactory
    (2) Have your script accept 'local_rank' argument and do not set it yourself: `parser.add_argument("--local_rank", default=None, type=int)`
    (3) Use `torch.distributed.launch` package to run your script like this (replace <num_gpus> with number of gpus):
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/jasper.py ...
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet.py ...
 
 
 Large Training Example
 ~~~~~~~~~~~~~~~~~~~~~~
 
-Please refer to the `<nemo_git_repo_root>/examples/asr/jasper.py` for comprehensive example. It builds one train DAG and up to three validation DAGs to evaluate on different datasets.
+Please refer to the `<nemo_git_repo_root>/examples/asr/quartznet.py` for comprehensive example. It builds one train DAG
+and multiple validation DAGs. Each validation DAG shares the same model and parameters as the training DAG and can
+be used to evaluate a different evaluation dataset.
 
 Assuming, you are working with Volta-based DGX, you can run training like this:
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_dataset=/manifests/librivox-train-all.json --eval_datasets /manifests/librivox-dev-clean.json /manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/quartznet15x5.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_dataset=/manifests/librivox-train-all.json --eval_datasets /manifests/librivox-dev-clean.json /manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/quartznet15x5.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
 
 The command above should trigger 8-GPU training with mixed precision. In the command above various manifests (.json) files are various datasets. Substitute them with the ones containing your data.
 
 .. tip::
     You can pass several manifests (comma-separated) to train on a combined dataset like this: `--train_manifest=/manifests/librivox-train-all.json,/manifests/librivox-train-all-sp10pcnt.json,/manifests/cv/validated.json`. Here it combines 3 data sets: LibriSpeech, Mozilla Common Voice and LibriSpeech speed perturbed.
 
+.. _fine-tune:
 
 Fine-tuning
 -----------
 Training time can be dramatically reduced if starting from a good pre-trained model:
 
-    (1) Obtain pre-trained model (jasper_encoder, jasper_decoder and configuration files) `from here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`_.
-    (2) load pre-trained weights right after you've instantiated your jasper_encoder and jasper_decoder, like this:
+    (1) Obtain a pre-trained model (encoder, decoder and configuration files) `from here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`_.
+    (2) load pre-trained weights right after you've instantiated your encoder and decoder, like this:
 
 .. code-block:: python
 
-    jasper_encoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperEncoder-STEP-247400.pt")
-    jasper_decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt")
+    encoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperEncoder-STEP-247400.pt")
+    decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt")
     # in case of distributed training add args.local_rank
-    jasper_decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt", args.local_rank)
+    decoder.restore_from("<path_to_checkpoints>/15x5SEP/JasperDecoderForCTC-STEP-247400.pt", args.local_rank)
 
 .. tip::
     When fine-tuning, use smaller learning rate.
@@ -301,7 +311,7 @@ Training time can be dramatically reduced if starting from a good pre-trained mo
 Evaluation
 ----------
 
-First download pre-trained model (jasper_encoder, jasper_decoder and configuration files) `from here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`_ into `<path_to_checkpoints>`. We will use this pre-trained model to measure WER on LibriSpeech dev-clean dataset.
+First download pre-trained model (encoder, decoder and configuration files) `from here <https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5>`_ into `<path_to_checkpoints>`. We will use this pre-trained model to measure WER on LibriSpeech dev-clean dataset.
 
 .. code-block:: bash
 
@@ -331,6 +341,40 @@ Perform the following steps:
 
         python <nemo_git_repo_root>/examples/asr/jasper_eval.py --model_config=<nemo_git_repo_root>/examples/asr/configs/quartznet15x5.yaml --eval_datasets "<path_to_data>/dev_clean.json" --load_dir=<directory_containing_checkpoints> --lm_path=<path_to_6gram.binary>
 
+
+Using and Converting to Tarred Datasets
+---------------------------------------
+
+If you are training on a distributed cluster, you may want to avoid a dataset consisting of many small files and instead perform batched reads from tarballs.
+In this case, you can use the ``TarredAudioToTextDataLayer`` to load your data.
+
+The ``TarredAudioToTextDataLayer`` takes in an ``audio_tar_filepaths`` argument, which specifies the path(s) to the tarballs that contain the audio files, and a ``manifest_filepath`` argument that should contain the transcripts and durations corresponding to those files (with a unique WAV basename per entry).
+The ``audio_tar_filepaths`` argument can be in the form of a string, either containing a path to a single tarball or braceexpand-able to multiple paths, or a list of paths.
+Note that the data layer's size (via ``len``) is set by the number of entries of the manifest, rather than the number of files across all tarballs.
+
+This DataLayer uses `WebDataset <https://github.com/tmbdev/webdataset>`_ to read the tarred audio files.
+Since reads are performed sequentially, shuffling is done with a buffer which can be specified by the argument ``shuffle_n``.
+
+Please see the ``TarredAudioToTextDataLayer`` `documentation <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.data_layer.TarredAudioToTextDataLayer>`_ and the WebDataset documentation for more details.
+
+.. note::
+
+  If using ``torch.distributed`` processes, the ``TarredAudioToTextDataLayer`` will automatically partition the audio tarballs across workers.
+  As such, if you are training on `n` workers, please make sure to divide your WAV files evenly across a number of tarballs that is divisible by `n`.
+
+Conversion from an Existing Dataset to Tarred Dataset
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you already have an ASR dataset that you would like to convert to one that is compatible with the ``TarredAudioToTextDataLayer``, you can use ``scripts/convert_to_tarred_audio_dataset.py``.
+
+This script takes a few arguments:
+
+* ``manifest_path`` (required): The path to your existing dataset's manifest file.
+* ``target_dir``: The directory where the tarballs and new manifest will be written. If none if given, defaults to ``./tarred``.
+* ``num_shards``: The number of shards (tarballs) to create. If using multiple workers for training, set this to be a multiple of the number of workers you have. Defaults to 1.
+* ``shuffle``: Setting this flag will shuffle the entries in your original manifest before creating the new dataset. You may want to do this if your original dataset is ordered, since the ``TarredAudioToTextDataLayer`` cannot shuffle the whole dataset (see ``shuffle_n``).
+
+
 Kaldi Compatibility
 -------------------
 
@@ -345,11 +389,11 @@ Of course, you will also need the .ark files that contain the audio data in the
 
 To load your Kaldi-formatted data, you can simply use the ``KaldiFeatureDataLayer`` instead of the ``AudioToTextDataLayer``.
 The ``KaldiFeatureDataLayer`` takes in an argument ``kaldi_dir`` instead of a ``manifest_filepath``, and this argument should be set to the directory that contains the files mentioned above.
-See `the documentation <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo_asr.data_layer.KaldiFeatureDataLayer>`_ for more detailed information about the arguments to this data layer.
+See `the documentation <https://nvidia.github.io/NeMo/collections/nemo_asr.html#nemo.collections.asr.data_layer.KaldiFeatureDataLayer>`_ for more detailed information about the arguments to this data layer.
 
 .. note::
 
-  If you are switching to a ``KaldiFeatureDataLayer``, be sure to set any ``feat_in`` parameters to correctly reflect the dimensionality of your Kaldi features, such as in the Jasper encoder. Additionally, your data is likely already preprocessed (e.g. into MFCC format), in which case you can leave out any audio preprocessors like the ``AudioToMelSpectrogramPreprocessor``.
+  If you are switching to a ``KaldiFeatureDataLayer``, be sure to set any ``feat_in`` parameters to correctly reflect the dimensionality of your Kaldi features, such as in the encoder. Additionally, your data is likely already preprocessed (e.g. into MFCC format), in which case you can leave out any audio preprocessors like the ``AudioToMelSpectrogramPreprocessor``.
 
 References
 ----------
diff --git a/docs/sources/source/collections/nemo_nlp.rst b/docs/sources/source/collections/nemo_nlp.rst
index 8c862d84fb66..5a77e055cf90 100644
--- a/docs/sources/source/collections/nemo_nlp.rst
+++ b/docs/sources/source/collections/nemo_nlp.rst
@@ -86,14 +86,14 @@ NLP Neural Modules
    :undoc-members:
    :show-inheritance:
    :exclude-members: forward
-
-.. automodule:: nemo.collections.nlp.nm.trainables.dialogue_state_tracking.state_tracking_trade_nm
+   
+.. automodule:: nemo.collections.nlp.nm.trainables.dialogue_state_tracking.trade_generator_nm
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: forward
 
-.. automodule:: nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm
+.. automodule:: nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_classifier_nm
    :members:
    :undoc-members:
    :show-inheritance:
@@ -107,3 +107,15 @@ NLP Hugging Face Neural Modules
    :undoc-members:
    :show-inheritance:
    :exclude-members: forward
+
+.. automodule:: nemo.collections.nlp.nm.trainables.common.huggingface.albert_nm
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :exclude-members: forward
+
+.. automodule:: nemo.collections.nlp.nm.trainables.common.huggingface.roberta_nm
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :exclude-members: forward
diff --git a/docs/sources/source/conf.py b/docs/sources/source/conf.py
index 8caeaaede9b5..18065b8a4049 100644
--- a/docs/sources/source/conf.py
+++ b/docs/sources/source/conf.py
@@ -15,39 +15,16 @@
 
 import os
 import sys
-from unittest.mock import MagicMock
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import nemo
-
-sys.path.insert(0, os.path.abspath("."))
-sys.path.insert(0, os.path.abspath("../../../"))
-
-# ---- Mocking up the classes. -----
-MOCK_CLASSES = {'Dataset': 'torch.utils.data', 'Module': 'torch.nn'}
-
-
-class Mock(MagicMock):
-    @classmethod
-    def __getattr__(cls, name):
-        if name in MOCK_CLASSES:
-            # return object  # Sphinx renders object in base classes
-            return type(name, (object,), {'__module__': MOCK_CLASSES[name]})
-        elif name == '__file__':
-            # Sphinx tries to find source code, but
-            # doesn't matter because it's mocked
-            return "FOO"
-        elif name == '__loader__':
-            return "BAR"
-        return MagicMock()
 
+sys.path.insert(0, os.path.abspath("../../.."))
+sys.path.insert(0, os.path.abspath(os.path.join("../../..", "nemo")))
 
-# ---- Mocking up the python modules. -----
 
-MOCK_MODULES = [
+autodoc_mock_imports = [
     'torch',
     'torch.nn',
     'torch.utils',
@@ -57,14 +34,33 @@ def __getattr__(cls, name):
     'torchvision',
     'torchvision.models',
     'torchtext',
+    'torch_stft',
     'h5py',
     'kaldi_io',
     'transformers',
     'transformers.tokenization_bert',
+    'apex',
+    'ruamel',
+    'frozendict',
+    'inflect',
+    'unidecode',
+    'librosa',
+    'soundfile',
+    'sentencepiece',
+    'youtokentome',
+    'megatron-lm',
+    'numpy',
+    'dateutil',
+    'wget',
+    'scipy',
+    'pandas',
+    'matplotlib',
+    'sklearn',
+    'braceexpand',
+    'webdataset',
+    'tqdm',
 ]
 
-sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
-
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -103,17 +99,21 @@ def __getattr__(cls, name):
 
 # General information about the project.
 project = "nemo"
-copyright = "2018-2019, NVIDIA"
+copyright = "2018-2020, NVIDIA"
 author = "NVIDIA"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-#
+
+from package_info import __version__
+
 # The short X.Y version.
-version = "0.9.0"
+# version = "0.10.0"
+version = __version__
 # The full version, including alpha/beta/rc tags.
-release = "0.9.0"
+# release = "0.9.0"
+release = __version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/sources/source/index.rst b/docs/sources/source/index.rst
index c5cfb6ae5bdf..f0b68255fab4 100644
--- a/docs/sources/source/index.rst
+++ b/docs/sources/source/index.rst
@@ -1,15 +1,16 @@
-NVIDIA Neural Modules Developer Guide
-=====================================
+NVIDIA NeMo Developer Guide
+===========================
 
 .. toctree::
    :hidden:
    :maxdepth: 2
 
    Introduction <self>
-   installation
    tutorials/intro
    training
    asr/intro
+   speaker_recognition/intro
+   speech_command/intro
    nlp/intro
    tts/intro
    collections/modules
@@ -17,7 +18,7 @@ NVIDIA Neural Modules Developer Guide
    chinese/intro
 
 
-Neural Modules (NeMo) is a framework-agnostic toolkit for building AI applications powered by Neural Modules. Current support is for PyTorch framework.
+NeMo is a framework-agnostic toolkit for building AI applications powered by Neural Modules. Current support is for PyTorch framework.
 
 A "Neural Module" is a block of code that computes a set of outputs from a set of inputs.
 
@@ -26,7 +27,8 @@ Neural Modules’ inputs and outputs have Neural Type for semantic checking.
 An application built with NeMo is a Directed Acyclic Graph (DAG) of connected modules enabling researchers to define and build new speech and nlp networks easily through API Compatible modules.
 
 
-**Introduction**
+Introduction
+------------
 
 See this video for a walk-through.
 
@@ -37,7 +39,8 @@ See this video for a walk-through.
     </div>
 
 
-**Core Concepts and Features**
+Core Concepts and Features
+--------------------------
 
 * `NeuralModule` class - represents and implements a neural module.
 * `NmTensor` - represents activations which flow between neural modules' ports.
@@ -47,79 +50,61 @@ See this video for a walk-through.
 * `Collections` - NeMo comes with collections - related group of modules such as `nemo_asr` (for Speech Recognition) and `nemo_nlp` for NLP
 
 
-**Requirements**
+Requirements
+------------
 
 1) Python 3.6 or 3.7
-2) PyTorch 1.2 or later with GPU support
-3) NVIDIA APEX: https://github.com/NVIDIA/apex
+2) PyTorch 1.4 or later with GPU support
+3) (optional for best performance) NVIDIA APEX: https://github.com/NVIDIA/apex
 
+.. _installation:
 
-**Getting started**
+Getting started
+---------------
 
 You can use NVIDIA `NGC NeMo container <https://ngc.nvidia.com/catalog/containers/nvidia:nemo>`_ for the latest NeMo release and all dependencies.
 
 .. code-block:: bash
 
     # Pull the docker
-    docker pull nvcr.io/nvidia/nemo:v0.9
-
-    # Do one of the two following commands
-    # Run Docker for docker version <19.03
-    nvidia-docker run -it --rm --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.9
+    docker pull nvcr.io/nvidia/nemo:v0.10
     
     # Run Docker for docker version >=19.03
-    docker run -it --rm --gpus all --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.9
+    docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.10
 
 and begin using NeMo immediately.
 
 If you have all requirements installed (or are using `NGC PyTorch container <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ ),
-then you can simply use pip to install the latest released version (**currently 0.9.0**) of NeMo and its collections:
+then you can simply use pip to install the latest released version (**currently 0.10.1**) of NeMo and its collections:
 
 .. code-block:: bash
 
-    pip install nemo-toolkit  # installs NeMo Core
-    pip install nemo-asr # installs NeMo ASR collection
-    pip install nemo-nlp # installs NeMo NLP collection
-    pip install nemo-tts # installs NeMo TTS collection
+    pip install nemo_toolkit[all] # Installs NeMo Core and all collections including nemo_asr, nemo_nlp, nemo_tts
 
-**Tutorials**
+Tutorials
+---------
 
-* `Speech recognition <https://nvidia.github.io/NeMo/asr/intro.html>`_
-* `Natural language processing <https://nvidia.github.io/NeMo/nlp/intro.html>`_
-* `Speech synthesis <https://nvidia.github.io/NeMo/tts/intro.html>`_
+* :ref:`Speech recognition <asr-docs>`
+* :ref:`Natural language processing <nlp-docs>`
+* :ref:`Speech synthesis <tts-docs>`
 
-**Installing From Github**
+Installing From Github
+----------------------
 
 If you prefer to use NeMo's latest development version (from GitHub) follow the steps below:
 
-*Note*: For step 2 and 3, if you want to use NeMo in development mode, use: ``pip install -e .`` instead of ``pip install .``
-
 1) Clone the repository ``git clone https://github.com/NVIDIA/NeMo.git``
-2) Go to NeMo folder and install the toolkit:
-
-.. code-block:: bash
-
-	cd NeMo/nemo
-	pip install .
-
-3) Install the collection(s) you want.
+2) Go to NeMo folder and install the toolkit and collections:
 
 .. code-block:: bash
 
-    # Install the ASR collection from collections/nemo_asr
-    apt-get install libsndfile1
-    cd NeMo/collections/nemo_asr
-    pip install .
-
-    # Install the NLP collection from collections/nemo_nlp
-    cd NeMo/collections/nemo_nlp
-    pip install .
+    ./reinstall.sh
 
-    # Install the TTS collection from collections/nemo_tts
-    cd NeMo/collections/nemo_tts
-    pip install .
+.. note::
+    reinstall.sh install NeMo in development mode.
 
-**Unittests**
+Unittests
+---------
 
 This command runs unittests:
 
@@ -128,7 +113,8 @@ This command runs unittests:
     ./reinstall.sh
     python -m unittest tests/*.py
 
-**Building Docker Container**
+Building Docker Container
+-------------------------
 
 The NeMo Docker image requires Docker Buildx which is included in Docker 19.03 and layer. To build a custom NeMo Docker image, run
 
@@ -151,7 +137,7 @@ container at runtime.
 
     # launch the container, mapping local nemo into it
     cd <nemo_path>
-    docker run -it --rm --gpus all -v $(pwd):/workspace/nemo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.9
+    docker run -it --rm --gpus all -v $(pwd):/workspace/nemo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:v0.10
 
     # install in development mode
     ./reinstall.sh
diff --git a/docs/sources/source/installation.rst b/docs/sources/source/installation.rst
deleted file mode 100644
index eb69382a0b98..000000000000
--- a/docs/sources/source/installation.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. _installation:
-
-Installation
-============
-
-**Requirements**
-
-1) Python 3.6 or 3.7
-2) `CUDA <https://developer.nvidia.com/cuda-downloads/>`_ >= 10.0
-3) `cuDNN <https://developer.nvidia.com/cudnn/>`_ >= 7.6
-4) `APEX <https://github.com/NVIDIA/apex/>`_
-5) PyTorch >=1.2
-6) (Recommended for distributed training) `NCCL <https://github.com/NVIDIA/nccl/>`_ >= 2.4
-
-.. tip:: Instead of installing all requirements. They are all automatically included 
-    in the `NVIDIA's PyTorch container <https://ngc.nvidia.com/catalog/containers/nvidia:pytorch>`_ .
-    You can pull it like so: `docker pull nvcr.io/nvidia/pytorch:19.11-py3`
-
-**Installing NeMo and Collections**
-
-*Note*: For step 2 and 3, if you want to use NeMo in development mode, use: ``pip install -e .`` instead of ``pip install .``
-
-1) Clone the repository:
-
-.. code-block:: bash
-
-    git clone https://github.com/NVIDIA/nemo
-
-2) Go to ``nemo`` folder and install NeMo Core:
-
-.. code-block:: bash
-
-    cd nemo
-    pip install .
-
-3) Install collections
-
-	a) To install the ASR collection from ``collections/nemo_asr``:
-	
-   	.. code-block:: bash
-
-   		cd ../collections/nemo_asr
-   		sudo apt-get install libsndfile1 && pip install .
-
-
-    b) To install the NLP collection from ``collections/nemo_nlp``:
-
-    .. code-block:: bash
-
-   		cd ../nemo_nlp
-   		pip install .
-
-    c) To install the LPR collection from ``collections/nemo_simple_gan``:
-
-    .. code-block:: bash
-
-   		cd ../nemo_simple_gan
-   		pip install .
-
-    d) To install the TTS collection from ``collections/nemo_tts``:
-
-    .. code-block:: bash
-
-      cd ../nemo_tts
-      pip install .
-
-
-4) Run unittests from the nemo directory to validate installation:
-
-.. code-block:: bash
-
-    python -m unittest tests/*.py
-    
-All tests should pass without errors.
-
-5) Go to ``examples/start_here`` to get started with few simple examples
-
diff --git a/docs/sources/source/nlp/asr-improvement.rst b/docs/sources/source/nlp/asr-improvement.rst
index d8cb99de02fd..934504b1e71c 100644
--- a/docs/sources/source/nlp/asr-improvement.rst
+++ b/docs/sources/source/nlp/asr-improvement.rst
@@ -1,5 +1,5 @@
 Tutorial
-===========================
+========
 
 In this tutorial we will train an ASR postprocessing model to correct mistakes in
 output of end-to-end speech recognition model. This model method works similar to translation model in contrast to traditional ASR language model rescoring.
@@ -34,21 +34,21 @@ Importing parameters from pretrained BERT
 Both encoder and decoder are initialized with pretrained BERT parameters.
 Since BERT language model has the same architecture as transformer encoder, there is no need to do anything additional.
 To prepare decoder parameters from pretrained BERT we wrote a script ``get_decoder_params_from_bert.py`` that downloads BERT
-parameters from the ``pytorch-transformers`` repository :cite:`asr-imps-huggingface2019transformers` and maps them into a transformer decoder.
+parameters from the ``transformers`` repository :cite:`asr-imps-huggingface2019transformers` and maps them into a transformer decoder.
 Encoder-decoder attention is initialized with self-attention parameters.
-The script is located under ``scripts`` directory and accepts 2 arguments:
+The script is located under ``examples/nlp/asr_postprocessor/get_decoder_params_from_bert.py`` directory and accepts 2 arguments:
 
 * ``--model_name``: e.g. ``bert-base-cased``, ``bert-base-uncased``, etc.
 * ``--save_to``: a directory where the parameters will be saved
 
     .. code-block:: bash
 
-        $ python get_decoder_params_from_bert.py --model_name bert-base-uncased
+        $ python get_decoder_params_from_bert.py --model_name bert-base-uncased --save_to results_dir
 
 
 Neural modules overview
 --------------------------
-First, as with all models built in NeMo, we instantiate Neural Module Factory which defines 1) backend (PyTorch or TensorFlow), 2) mixed precision optimization level, 3)
+First, as with all models built in NeMo, we instantiate Neural Module Factory which defines 1) backend (PyTorch), 2) mixed precision optimization level, 3)
 local rank of the GPU, and 4) an experiment manager that creates a timestamped folder to store checkpoints, relevant outputs, log files, and TensorBoard graphs.
 
     .. code-block:: python
@@ -128,14 +128,14 @@ We trained on 8 GPUS. To launch the training in multi-gpu mode run the following
 
     .. code-block:: bash
 
-        $ python -m torch.distributed.launch --nproc_per_node=8  asr_postprocessor.py --data_dir ../../tests/data/pred_real/ --restore_from ../../scripts/bert-base-uncased_decoder.pt
+        $ python -m torch.distributed.launch --nproc_per_node=8  asr_postprocessor.py --data_dir data_dir --restore_from bert-base-uncased_decoder.pt
 
 
 
 References
 ------------------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: ASR-IMPROVEMENTS
     :keyprefix: asr-imps-    
diff --git a/docs/sources/source/nlp/bert_pretraining.rst b/docs/sources/source/nlp/bert_pretraining.rst
index 389f6a307466..36caa05959c6 100644
--- a/docs/sources/source/nlp/bert_pretraining.rst
+++ b/docs/sources/source/nlp/bert_pretraining.rst
@@ -1,16 +1,38 @@
-Pretraining BERT
-================
+
+
+Tutorial
+========
 
 In this tutorial, we will build and train a masked language model, either from scratch or from a pretrained BERT model, using the BERT architecture :cite:`nlp-bert-devlin2018bert`.
 Make sure you have ``nemo`` and ``nemo_nlp`` installed before starting this tutorial. See the :ref:`installation` section for more details.
 
 The code used in this tutorial can be found at ``examples/nlp/language_modeling/bert_pretraining.py``.
 
-.. tip::
-    Pretrained BERT models can be found at 
-    `https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo>`__
-    `https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo>`__
-    `https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo>`__
+.. _pretrained_models_bert:
+
+Download pretrained models
+--------------------------
+
+Pretrained BERT models and model configuration files can be downloaded at following links.
+
+BERT Large models (~330M parameters):
+`https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo>`__
+
+BERT Base models (~110M parameters):
+`https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo>`__
+`https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo <https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo>`__
+
+Model results on downstream tasks:
+
++---------------------------------------------+--------+--------+--------+--------+--------+--------+
+|                                             | SQuADv1.1       | SQuADv2.0       | GLUE MRPC       |
++                                             +--------+--------+--------+--------+--------+--------+
+|  Model                                      | EM     |  F1    |  EM    |  F1    |  Acc   |  F1    |
++=============================================+========+========+========+========+========+========+
+| BERT-base-uncased                           | 82.74% | 89.79% | 71.24% | 74.32% | 86.52% | 90.53% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+
+| BERT-large-uncased                          | 85.79% | 92.28% | 80.17% | 83.32% | 88.72% | 91.96% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+
 
 Introduction
 ------------
@@ -18,6 +40,7 @@ Introduction
 Creating domain-specific BERT models can be advantageous for a wide range of applications. One notable is domain-specific BERT in a biomedical setting,
 similar to BioBERT :cite:`nlp-bert-lee2019biobert` and SciBERT :cite:`nlp-bert-beltagy2019scibert`.
 
+.. _bert_data_download:
 
 Download Corpus
 ---------------
@@ -25,7 +48,7 @@ Download Corpus
 The training corpus can be either raw text where data preprocessing is done on the fly or an already preprocessed data set. In the following we will give examples for both.
 To showcase how to train on raw text data, we will be using the very small WikiText-2 dataset :cite:`nlp-bert-merity2016pointer`.
 
-To download the dataset, run the script ``examples/nlp/scripts/get_wt2.sh``. After downloading and unzipping, the folder should include 3 files that look like this:
+To download the dataset, run the script ``examples/nlp/language_modeling/get_wkt2.sh download_dir``. After downloading and unzipping, the folder is located at `download_dir` and should include 3 files that look like this:
 
     .. code-block:: bash
 
@@ -34,45 +57,46 @@ To download the dataset, run the script ``examples/nlp/scripts/get_wt2.sh``. Aft
         valid.txt
 
 To train BERT on a Chinese dataset, you may download the Chinese Wikipedia corpus wiki2019zh_. After downloading, you may unzip and
-use the script ``examples/nlp/scripts/process_wiki_zh.py`` for preprocessing the raw text.
+use the script ``examples/nlp/language_modeling/process_wiki_zh.py`` for preprocessing the raw text.
 
 .. _wiki2019zh: https://github.com/brightmart/nlp_chinese_corpus
 
     .. code-block:: bash
 
-        python examples/nlp/scripts/process_wiki_zh.py --data_dir=./wiki_zh --output_dir=./wiki_zh --min_frequency=3
+        python examples/nlp/language_modeling/process_wiki_zh.py --data_dir=./wiki_zh --output_dir=./wiki_zh --min_frequency=3
 
 For already preprocessed data, we will be using a large dataset composed of Wikipedia and BookCorpus as in the original BERT paper.
 
-To download the dataset, go to ``https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT`` 
-and run the script ``./data/create_datasets_from_start.sh``.
-The downloaded folder should include a 2 sub folders with the prefix ``lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5``
-and ``lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5``, containing sequences of length 128 with a maximum of 20 masked tokens
+To download the dataset, go to `https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT#quick-start-guide <https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT#quick-start-guide>`__,
+follow steps 1-5 in the Quick-Start-Guide and run the script ``./data/create_datasets_from_start.sh`` inside the docker container.
+The downloaded folder should include a 2 sub folders with the prefix `lower_case_[0,1]_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5`
+and `lower_case_[0,1]_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5`, containing sequences of length 128 with a maximum of 20 masked tokens
 and sequences of length 512 with a maximum of 80 masked tokens respectively.
 
 
-Create the tokenizer model
+Create the tokenizer
 --------------------------
 A tokenizer will be used for data preprocessing and, therefore, is only required for training using raw text data.
 
 `BERTPretrainingDataDesc` converts your dataset into the format compatible with `BertPretrainingDataset`. The most computationally intensive step is to tokenize
 the dataset to create a vocab file and a tokenizer model.
 
-You can also use an available vocab or tokenizer model to skip this step. If you already have a pretrained tokenizer model,
-copy it to the ``[data_dir]/bert`` folder under the name ``tokenizer.model`` and the script will skip this step.
-
-If have an available vocab, say the ``vocab.txt`` file from any `pretrained BERT model`_, copy it to the ``[data_dir]/bert`` folder under the name ``vocab.txt``.
+You can also use an available vocab or tokenizer model to skip this step. If you already have a pretrained tokenizer model
+copy it to the `[data_dir]/bert` folder under the name `tokenizer.model` and the script will skip this step.
 
-.. _pretrained BERT model: https://github.com/google-research/bert#pre-trained-models
+If have an available vocab, such as `vocab.txt` file from any pretrained BERT model, copy it to the `[data_dir]/bert` folder under the name `vocab.txt`.
 
     .. code-block:: python
+      
+        import nemo.collections.nlp as nemo_nlp
 
-        data_desc = nemo_nlp.data.BERTPretrainingDataDesc(args.dataset_name,
-                                            args.data_dir,
-                                            args.vocab_size,
-                                            args.sample_size,
-                                            special_tokens,
-                                            'train.txt')
+        data_desc = nemo_nlp.data.BERTPretrainingDataDesc(
+                        dataset_name=args.dataset_name,
+                        train_data=args.train_data,
+                        eval_data=args.eval_data,
+                        vocab_size=args.vocab_size,
+                        sample_size=args.sample_size,
+                        special_tokens=special_tokens)
 
 We need to define our tokenizer. If you'd like to use a custom vocabulary file, we strongly recommend you use our `SentencePieceTokenizer`.
 Otherwise, if you'll be using a vocabulary file from another pre-trained BERT model, you should use `NemoBertTokenizer`.
@@ -83,13 +107,13 @@ To train on a Chinese dataset, you should use `NemoBertTokenizer`.
 
         # If you're using a custom vocabulary, create your tokenizer like this
         tokenizer = nemo_nlp.data.SentencePieceTokenizer(model_path="tokenizer.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.get_bert_special_tokens('bert')
         tokenizer.add_special_tokens(special_tokens)
 
         # Otherwise, create your tokenizer like this
-        tokenizer = nemo_nlp.data.NemoBertTokenizer(vocab_file="vocab.txt")
-        # or
         tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model="bert-base-uncased") 
+        # or 
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(vocab_file="vocab.txt")
 
 Create the model
 ----------------
@@ -123,7 +147,10 @@ We also need to define the BERT model that we will be pre-training. Here, you ca
             max_position_embeddings=args.max_seq_length,
             hidden_act=args.hidden_act)
 
-If you want to start pre-training from existing BERT checkpoints, specify the checkpoint folder path with the argument ``--load_dir``. 
+
+.. note::
+    If you want to start pre-training from existing BERT checkpoints, specify the checkpoint folder path with the argument ``--load_dir``. 
+
 The following code will automatically load the checkpoints if they exist and are compatible to the previously defined model
 
     .. code-block:: python
@@ -131,12 +158,14 @@ The following code will automatically load the checkpoints if they exist and are
         ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
                             load_from_folder=args.load_dir)
 
-For the full list of BERT model names, check out `nemo_nlp.huggingface.BERT.list_pretrained_models()`
-
+To initialize the model with already pretrained checkpoints, specify ``pretrained_model_name``. For example, to initialize BERT Base trained on cased Wikipedia and BookCorpus with 12 layers, run
+    
     .. code-block:: python
 
         bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name="bert-base-cased")
 
+For the full list of BERT model names, check out `nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()`.
+
 Next, we will define our classifier and loss functions. We will demonstrate how to pre-train with both MLM (masked language model) and NSP (next sentence prediction) losses,
 but you may observe higher downstream accuracy by only pre-training with MLM loss.
 
@@ -148,7 +177,7 @@ but you may observe higher downstream accuracy by only pre-training with MLM los
                                     activation=ACT2FN[args.hidden_act],
                                     log_softmax=True)
 
-        mlm_loss_fn = nemo_nlp.nm.losses.MaskedLanguageModelingLossNM()
+        mlm_loss_fn = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss()
 
         nsp_classifier = nemo_nlp.nm.trainables.SequenceClassifier(
                                                 args.hidden_size,
@@ -157,13 +186,25 @@ but you may observe higher downstream accuracy by only pre-training with MLM los
                                                 activation='tanh',
                                                 log_softmax=False)
 
-        nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()
+        nsp_loss_fn = nemo.backends.pytorch.common.CrossEntropyLossNM()
+
+        bert_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
+
+Finally we will tie the weights of the encoder embedding layer and the MLM output embedding:
+
+    .. code-block:: python
 
-        bert_loss = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)
+        mlm_classifier.tie_weights_with(
+            bert_model,
+            weight_names=["mlp.last_linear_layer.weight"],
+            name2name_and_transform={
+                "mlp.last_linear_layer.weight": ("bert.embeddings.word_embeddings.weight", nemo.core.WeightShareTransform.SAME)
+            },
+        )
 
 Then, we create the pipeline from input to output that can be used for both training and evaluation:
 
-For training from raw text use nemo_nlp.BertPretrainingDataLayer, for preprocessed data use nemo_nlp.BertPretrainingPreprocessedDataLayer
+For training from raw text use `nemo_nlp.nm.data_layers.BertPretrainingDataLayer`, for preprocessed data use `nemo_nlp.nm.data_layers.BertPretrainingPreprocessedDataLayer`
 
     .. code-block:: python
 
@@ -179,7 +220,8 @@ For training from raw text use nemo_nlp.BertPretrainingDataLayer, for preprocess
             # data_layer = nemo_nlp.BertPretrainingPreprocessedDataLayer(
             #        data_file,
             #        max_predictions_per_seq,
-            #        batch_size, is_training)
+            #        batch_size,
+            #        mode)
 
             steps_per_epoch = len(data_layer) // (batch_size * args.num_gpus * args.batches_per_step)
 
@@ -191,7 +233,7 @@ For training from raw text use nemo_nlp.BertPretrainingDataLayer, for preprocess
 
             mlm_logits = mlm_classifier(hidden_states=hidden_states)
             mlm_loss = mlm_loss_fn(logits=mlm_logits,
-                                   output_ids=input_data.output_ids,
+                                   labels=input_data.output_ids,
                                    output_mask=input_data.output_mask)
 
             nsp_logits = nsp_classifier(hidden_states=hidden_states)
@@ -209,48 +251,76 @@ For training from raw text use nemo_nlp.BertPretrainingDataLayer, for preprocess
                                     mask_probability=args.mask_probability,
                                     short_seq_prob=args.short_seq_prob,
                                     batch_size=args.batch_size,
-                                    batches_per_step=args.batches_per_step)
+                                    batches_per_step=args.batches_per_step,
+                                    mode="train")
 
         # for preprocessed data 
         # train_loss, _, _, steps_per_epoch = create_pipeline(
-        #                            data_file=args.data_dir,
+        #                            data_file=args.train_data,
         #                            preprocessed_data=True,
         #                            max_predictions_per_seq=args.max_predictions_per_seq,
-        #                            training=True,
         #                            batch_size=args.batch_size,
-        #                            batches_per_step=args.batches_per_step)
-
-        eval_loss, eval_tensors, _ = create_pipeline(data_desc.eval_file,
-                                                     args.max_seq_length,
-                                                     args.mask_probability,
-                                                     args.eval_batch_size)
+        #                            batches_per_step=args.batches_per_step,
+        #                            mode="train")
+
+        eval_loss, _, _, _ = create_pipeline(
+                                        data_file=data_desc.eval_file,
+                                        preprocessed_data=False,
+                                        max_seq_length=args.max_seq_length,
+                                        mask_probability=args.mask_probability,
+                                        short_seq_prob=args.short_seq_prob,
+                                        batch_size=args.batch_size,
+                                        batches_per_step=args.batches_per_step,
+                                        mode="eval")
+        
+        # for preprocessed data 
+        # eval_loss, eval_mlm_loss, eval_nsp_loss, _ = create_pipeline(
+        #                            data_file=args.eval_data,
+        #                            preprocessed_data=True,
+        #                            max_predictions_per_seq=args.max_predictions_per_seq,
+        #                            batch_size=args.batch_size,
+        #                            batches_per_step=args.batches_per_step,
+        #                            mode="eval")
 
 
-Next, we define necessary callbacks:
+Run the model
+----------------
 
-1. `SimpleLossLoggerCallback`: tracking loss during training
-2. `EvaluatorCallback`: tracking metrics during evaluation at set intervals
-3. `CheckpointCallback`: saving model checkpoints at set intervals
+Define your learning rate policy
 
     .. code-block:: python
 
-        train_callback = nemo.core.SimpleLossLoggerCallback(...)
-        eval_callback = nemo.core.EvaluatorCallback(...)
-        ckpt_callback = nemo.core.CheckpointCallback(...)
+        lr_policy_fn = get_lr_policy(args.lr_policy,
+                                    total_steps=args.num_iters,
+                                    warmup_ratio=args.lr_warmup_proportion)
 
-.. tip::
+        # if you are training on raw text data, you have use the alternative to set the number of training epochs
+        lr_policy_fn = get_lr_policy(args.lr_policy,
+                                     total_steps=args.num_epochs * steps_per_epoch,
+                                     warmup_ratio=args.lr_warmup_proportion)
 
-    Tensorboard_ is a great debugging tool. It's not a requirement for this tutorial, but if you'd like to use it, you should install tensorboardX_ and run the following command during pre-training:
+Next, we define necessary callbacks:
 
-    .. code-block:: bash
+1. `SimpleLossLoggerCallback`: tracking loss during training
+2. `EvaluatorCallback`: tracking metrics during evaluation at set intervals
+3. `CheckpointCallback`: saving model checkpoints at set intervals
 
-        tensorboard --logdir bert_pretraining_tb
+    .. code-block:: python
 
-.. _Tensorboard: https://www.tensorflow.org/tensorboard
-.. _tensorboardX: https://github.com/lanpa/tensorboardX
+        train_callback = nemo.core.SimpleLossLoggerCallback(tensors=[train_loss],
+            print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())))),
+            step_freq=args.train_step_freq,
+        eval_callback = nemo.core.EvaluatorCallback(eval_tensors=[eval_loss],
+            user_iter_callback=nemo_nlp.callbacks.lm_bert_callback.eval_iter_callback,
+            user_epochs_done_callback=nemo_nlp.callbacks.lm_bert_callback.eval_epochs_done_callback
+            eval_step=args.eval_step_freq)
+        ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,
+            epoch_freq=args.save_epoch_freq,
+            load_from_folder=args.load_dir,
+            step_freq=args.save_step_freq)
 
 
-We also recommend you export your model's parameters to a config file. This makes it easier to load your BERT model into NeMo later, as explained in our NER tutorial.
+We recommend you export your model's parameters to a config file. This makes it easier to load your BERT model into NeMo later, as explained in our Named Entity Recognition :ref:`ner_tutorial` tutorial.
 
     .. code-block:: python
 
@@ -263,15 +333,6 @@ Finally, you should define your optimizer, and start training!
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
-
-        # if you are training is based on number of iterations rather than number of epochs, use
-        # lr_policy_fn = get_lr_policy(args.lr_policy,
-        #                           total_steps=args.total_iterations_per_gpu,
-        #                           warmup_ratio=args.lr_warmup_proportion)
-
         nf.train(tensors_to_optimize=[train_loss],
                  lr_policy=lr_policy_fn,
                  callbacks=[train_callback, eval_callback, ckpt_callback],
@@ -282,10 +343,64 @@ Finally, you should define your optimizer, and start training!
                                       "betas": (args.beta1, args.beta2),
                                       "weight_decay": args.weight_decay})
 
+
+How to use the training script 
+--------------------------------
+
+You can find the example training script at ``examples/nlp/language_modeling/bert_pretraining.py``.
+
+For single GPU training, the script can be started with 
+
+.. code-block:: bash
+
+    cd examples/nlp/language_modeling
+    python bert_pretraining.py --config_file bert-config.json [args]
+
+The BERT configuration files can be found in the NGC model repositories, see :ref:`pretrained_models_bert`.
+
+
+For multi-GPU training with ``x`` GPUs, the script can be started with 
+
+.. code-block:: bash
+
+    cd examples/nlp/language_modeling
+    python -m torch.distributed.launch --nproc_per_node=x bert_pretraining.py --num_gpus=x [args]
+  
+
+If you running the model on raw text data, please remember to add the argument ``data_text`` to the python command.
+
+.. code-block:: bash
+
+    python bert_pretraining.py [args] data_text [args]
+
+Similarly, to run the model on already preprocessed data add the argument ``data_preprocessed`` to the python command.
+
+.. code-block:: bash
+
+    python bert_pretraining.py [args] data_preprocessed [args]
+
+.. note::
+    By default, the script assumes ``data_preprocessed`` as input mode.
+
+.. note::
+    For downloading or preprocessing data offline please refer to :ref:`bert_data_download`.
+
+
+.. tip::
+
+    Tensorboard_ is a great debugging tool. It's not a requirement for this tutorial, but if you'd like to use it, you should install tensorboardX_ and run the following command during pre-training:
+
+    .. code-block:: bash
+
+        tensorboard --logdir outputs/bert_lm/tensorboard
+
+.. _Tensorboard: https://www.tensorflow.org/tensorboard
+.. _tensorboardX: https://github.com/lanpa/tensorboardX
+
 References
 ----------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-BERT-PRETRAINING
     :keyprefix: nlp-bert-    
diff --git a/docs/sources/source/nlp/dialogue_state_tracking.rst b/docs/sources/source/nlp/dialogue_state_tracking.rst
new file mode 100644
index 000000000000..871255e2fec6
--- /dev/null
+++ b/docs/sources/source/nlp/dialogue_state_tracking.rst
@@ -0,0 +1,516 @@
+TRADE Tutorial
+==============
+
+Introduction
+------------
+
+The goal of **Dialog State Tracking (DST)** :cite:`nlp-dst-henderson2015machine` \
+is to build a representation of the status of the ongoing conversation \
+being a sequence of utterances exchanged between dialog participants. \
+In another words, the goal of DST system is to capture user goals and intentions and encode them as a set of \
+**slots** along with the corresponding **values**. DST is considered an important module for most of the goal-oriented dialogue systems.
+
+
+.. figure:: dst_multiwoz_example.png
+
+   Fig. 1: An exemplary, multi-domain dialog along with the associated state tracking (source: \
+   :cite:`nlp-dst-wu2019transferable`)
+
+
+In this tutorial we will focus on a multi-domain dialogue MultiWOZ dataset :cite:`nlp-dst-budzianowski2018multiwoz` \
+and show how one can train a TRADE model :cite:`nlp-dst-wu2019transferable`, \
+being one of the recent, state of the art models. \
+**Multi-domain** setting introduces several challanges, with the most important coming from the need for \
+**multi-turn mapping**. In a **single-turn mapping** scenario the (**domain**, **slot**, **value**) triplet can be \
+inferred from a single turn. In multi-turn this assumption does not hold and the DST system must infer those from \
+multiple turns, possibly spanning over several different domains.
+
+
+
+
+The MultiWOZ Dataset
+--------------------
+
+The Multi-Domain Wizard-of-Oz dataset (`MultiWOZ`_) is a collection of human-to-human conversations spanning over \
+7 distinct domains and containing over 10,000 dialogues.
+The original MultiWOZ 2.0 dataset was introduced in :cite:`nlp-dst-budzianowski2018multiwoz`.
+However, in this tutorial we will utilize MultiWOZ 2.1  :cite:`nlp-dst-eric2019multiwoz`, which is an updated version of MultiWOZ 2.0. They have fixed several issues with the original dataset including errors in states, utterances, value canonicalization etc.). Our model can also get trained on MultiWOZ 2.0.
+
+.. _MultiWOZ: https://www.repository.cam.ac.uk/handle/1810/294507
+
+The MultiWOZ dataset covers the following domains:
+ 1. restaurant
+ 2. hotel
+ 3. attraction
+ 4. taxi
+ 5. train
+ 6. hospital
+ 7. police
+
+As well as the following slots:
+ * inform (∗)
+ * address (∗)
+ * postcode (∗)
+ * phone (∗)
+ * name (1234)
+ * no of choices (1235)
+ * area (123)
+ * pricerange (123)
+ * type (123)
+ * internet (2)
+ * parking (2)
+ * stars (2)
+ * open hours (3)
+ * departure (45)
+ * destination (45)
+ * leave after (45)
+ * arrive by (45)
+ * no of people (1235)
+ * reference no. (1235)
+ * trainID (5)
+ * ticket price (5)
+ * travel time (5)
+ * department (7)
+ * day (1235)
+ * no of days (123).
+
+
+Please note that some of the actions and slots are associated with particular domain(s), whereas some are universal, \
+i.e. domain independent. The latter ones are denoted with (∗).
+
+
+MultiWOZ offers 10,438 dialogues, with 115,434 turns in total. \
+Dialogues are generally classified into single and multi-domain dialogues. \
+Dialogue length distribution is varying from 1 to 31, with around 70% of dialogues have more than 10 turns. \
+The average number of turns are 8.93 and 15.39 for single and multi-domain dialogues. \
+
+Each dialogue consists of a goal, multiple user and system utterances as well as a belief state and set of dialogue \
+acts with slots per turn. Additionally, each dialog is supported with a task description. \
+Moreover, it contains both system and user dialogue act annotations (the latter introduced in MultiWOZ 2.1).
+
+
+TRADE Model
+-----------
+
+The **TRA**\nsferable **D**\ialogue stat\ **E** generator (TRADE) :cite:`nlp-dst-wu2019transferable`  is a model \
+designed specially for the multi-domain \
+task-oriented dialogue state tracking problem. \
+The model generates dialogue states from utterances and history. It learns embeddings for domains and slots, and also \
+benefits from copy mechanism to facilitate knowledge transfer between domains. It enables the model to predict
+\(**domain**, **slot**, **value**) triplets not encountered during training in a given domain.
+
+
+.. figure:: dst_trade_architecture.png
+
+   Fig. 2: Architecture of the TRADE model (source: :cite:`nlp-dst-wu2019transferable`)
+
+The model is composed of three main components:
+
+ * **Utterance Encoder**,
+ * **Slot Gate**, and
+ * **State Generator**.
+
+The **utterance encoder** is a bi-directional Gated Recurrent Unit (GRU), returning both \
+context words and and an aggregated context vector encoding the whole dialogue history.
+
+The **state generator** also uses GRU to predict the value for each(domain, slot) pair. Generator employ a soft-gated \
+pointer-generator copying to combine a **distribution over the vocabulary** and a **distribution over the dialogue \
+history** into a single output distribution.
+
+Finally, the **slot gate** is a simple classifier that maps a context  vector taken from the encoder hidden states \
+to a probability  distribution  over three classes: *ptr*, *none*,  and *dontcare*.
+
+Data Pre-processing
+-------------------
+
+First, you need to download `MULTIWOZ2.1.zip` from the `MultiWOZ2.1`_ project website. It contains the data for \
+MultiWOZ 2.1 dataset. Alternatively, you can download `MULTIWOZ2.zip` compressed file from `MultiWOZ2.0`_ which \
+contain the older version of this dataset.
+
+.. _MultiWOZ2.1: https://www.repository.cam.ac.uk/handle/1810/294507
+
+.. _MultiWOZ2.0: https://www.repository.cam.ac.uk/handle/1810/280608
+
+Next, we need to preprocess and reformat the dataset, what will result in division of data into three splits:
+
+ * traininig split (8242 dialogs in the ``train_dials.json`` file)
+ * development/validation split (1000 dialogs in the ``dev_dials.json`` file)
+ * test split (999 dialogs in the ``test_dials.json`` file)
+
+In order to preprocess the MultiWOZ dataset you can use the provided `process_multiwoz.py`_ script:
+
+.. _process_multiwoz.py: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/dialogue_state_tracking/data/process_multiwoz.py
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking/data/
+    python process_multiwoz.py \
+        --source_data_dir <path to MultoWOZ dataset> \
+        --target_data_dir <path to store the processed data>
+
+.. note::
+    Argument `--source_data_dir` specifies the folder where you have copied and extracted data into. \
+    It will store the processed dataset in the folder given by `--target_data_dir`. \
+    Both MultiWOZ 2.0 and MultiWOZ 2.1 datasets can get processed with the same script.
+
+
+Building the NeMo Graph
+-----------------------
+
+The NeMo training graph consists of the following six modules including data layer, encoder, decoder, and losses:
+
+ * data_layer (:class:`nemo.collection.nlp.nm.data_layers.MultiWOZDataLayer`)
+ * encoder (:class:`nemo.backends.pytorch.common.EncoderRNN`)
+ * decoder (:class:`nemo.collection.nlp.nm.trainables.TRADEGenerator`)
+ * gate_loss_fn (:class:`nemo.backends.pytorch.common.losses.CrossEntropyLossNM`)
+ * ptr_loss_fn (:class:`nemo.collections.nlp.nm.losses.MaskedLogLoss`)
+ * total_loss_fn (:class:`nemo.collection.nlp.nm.losses.LossAggregatorNM`)
+
+Training
+--------
+
+In order to train an instance of the TRADE model on the MultiWOZ dataset and evaluate on its test data simply run \
+the `dialogue_state_tracking_trade.py`_ script with default parameters:
+
+.. _dialogue_state_tracking_trade.py: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py
+
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking
+    python dialogue_state_tracking_trade.py \
+        --data_dir <path to the data> \
+        --work_dir <path to store the experiment logs and checkpoints> \
+        --eval_file_prefix <test or dev>
+
+You may find the list of parameters in the example file and update them as see fits. \
+By default the script would train the model for 10 epochs on 1 single gpu. \
+The police and hospital domains are excluded from the training by default as they do not exist in the development set. \
+The list of the domains can get updated in the example.
+
+
+Evaluating Checkpoints
+----------------------
+
+By default a folder named "checkpoints" would get created under the working folder specified by `--work_dir` and \
+checkpoints are stored under it. To do evaluation a checkpoint on test or dev set, \
+you may run the same script by passing `--checkpoint_dir` and setting `--num_epochs` as zero to avoid the training:
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking
+    python dialogue_state_tracking_trade.py \
+        --data_dir <path to the data> \
+        --checkpoint_dir <path to checkpoint folder> \
+        --eval_file_prefix <test or dev> \
+        --eval_batch_size <batch size for evaluation> \
+        --num_epochs 0
+
+Metrics and Results
+-------------------
+
+In the following table we compare the results achieved by our TRADE model implementation with the results reported \
+in the original paper :cite:`nlp-dst-wu2019transferable`. We trained our models for 10 epochs on a single GPU with 16GB memory. \
+As the authors reported results on just MultiWOZ 2.0 dataset, we ran the original implementation on MultiWOZ 2.1 dataset \
+and reported those too.
+
+We used the same parameters as the original implementation. There are some differences between our implementation and \
+the original one. The main difference is that our model does not use pre-trained embeddings which seems not to affect \
+the performance of the model. The other difference is that we used SquareAnnealing for the learning policy instead of \
+fixed learning rate. Additionally, we create the vocabulary just based on the training data while the default for the \
+original one is to create vocabulary from all the data including test and development sets. The main reason behind \
+the improvement of our model in terms of accuracy is utilizing better learning rate policy. When we used fixed \
+learning rate in our implementation, we got similar results as the original one.
+
+We also did some improvements to the implementation of the model to have faster training. It makes our implementation \
+significantly faster than the original one. Additionally, NeMo supports multi-GPU training which enables even faster \
+training time. It should be noted that learning rate needs to get \
+increased if you want to use multi-GPU training because of having larger batch size.
+
+Following :cite:`nlp-dst-wu2019transferable`, we used two main metrics to evaluate the model performance:
+
+ * **Joint Goal Accuracy** compares the predicted dialogue states to the ground truth at each dialogue turn, and the
+   output is considered correct if and only if **all the predicted values exactly match** the ground truth values. 
+ * **Slot Accuracy** independently compares each (domain, slot, value) triplet to its ground truth label.
+
+
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+|                                             | MultiWOZ 2.0                      | MultiWOZ 2.1                      |
++                                             +--------+--------+--------+--------+--------+--------+--------+--------+
+|                                             | Test            |Development      |  Test           |Development      |
++                                             +--------+--------+--------+--------+--------+--------+--------+--------+
+| TRADE implementations                       | Goal   | Slot   | Goal   | Slot   | Goal   | Slot   | Goal   | Slot   |
++=============================================+========+========+========+========+========+========+========+========+
+| Original :cite:`nlp-dst-wu2019transferable` | 48.62% | 96.92% | 48.76% | 96.95% | 45.31% | 96.57% | 49.15% | 97.04% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+| NeMo's Implementation of TRADE              | 49.78% | 97.06% | 50.44% | 97.15% | 47.77% | 96.82% | 50.85% | 97.21% |
++---------------------------------------------+--------+--------+--------+--------+--------+--------+--------+--------+
+
+You may find the checkpoints for the trained models on MultiWOZ 2.0 and MultiWOZ 2.1 datasets on NGC:
+
+    **MultiWOZ 2.0**: https://ngc.nvidia.com/catalog/models/nvidia:trade___dialogue_state_tracker___multiwoz_2_0
+    **MultiWOZ 2.1**: https://ngc.nvidia.com/catalog/models/nvidia:trade___dialogue_state_tracker___multiwoz_2_1
+
+.. note::
+    During training, TRADE model uses an additional supervisory signal, enforcing the Slot Gate to properly \
+    predict special values for like **don't care** or **none** for the slots. The `process_multiwoz.py`_ script extracts the additional labels from the dataset and `dialogue_state_tracking_trade.py`_ script reports the **Gating Accuracy** as well.
+
+
+References
+----------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-DST
+    :keyprefix: nlp-dst-
+
+
+
+SGD Tutorial
+============
+
+Introduction
+------------
+
+A task-oriented dialogue system is a conversational system that can perform a conversation with a user and provide task- (or domain-)specific information. For example, it can book a table in a restaurant or buy a train ticket.
+One of the main building blocks of a task-oriented dialogue system is a Dialogue State Tracker (DST).
+DST should not only understand what the user just said but also remember what was said before.
+DST carries the information about what intent the user has in the conversation, for example, find a restaurant or book a plane ticket,
+and what slots along with the corresponding values were mentioned in the dialogue.
+
+
+The Schema-Guided Dialogue Dataset
+----------------------------------
+
+In this tutorial, we are using the Schema-Guided Dialogue (SGD) dataset :cite:`nlp-sgd-rastogi2019towards` that contains over 16k multi-domain goal-oriented conversations across 16 domains.
+The data represents conversations between a user and a virtual assistant, and it can be used for various dialogue management tasks:
+intent prediction, slot filling, dialogue state tracking, policy imitation learning, language generation. 
+
+One part of the dialogues in the dataset spans across only a single domain dialogues, use ``--task_name sgd_single_domain`` to use such dialogues. Another part focuses only on dialogues that span across multiple domains during a single conversation, 
+``--task_name sgd_multi_domain`` to train and evaluate on the multi-domain task. ``--task_name sgd_all`` will use all available dialogues for training and evaluation.
+
+An example of the data format could be found `here <https://raw.githubusercontent.com/google-research-datasets/dstc8-schema-guided-dialogue/master/train/dialogues_001.json>`_.
+Every dialogue contains the following information:
+
+* **dialogue_id** - a unique dialogue identifier
+* **services** - list of services mentioned in the dialogue
+* **turns** - a dialogue is comprised of multiple dialogue turns, where a single turn consists of user and systems utterances frames.
+* **frames** - each frame contains system or user utterance with assotiated annotraion.
+    
+    * Each **user** frame containts the following information (values in brackets are from the user frame example in Fig. 1, note some values in the state are coming from the previous dialogue turns):
+        
+        * **actions** - a list with the following values:
+            
+            * act - user's intent or act (INFORM)
+            * slot - slot names (price_range)
+            * values - a list of slot values (moderate)
+            * canonical_values (optional) - slot values in their canonicalized form as used by the service
+        
+        * **service** - service name for the current user utterance (Restaurants_1)
+        * **slots** - a list of slot spans in the user utterance, only provided for non-categorical slots. Each slot span contains the following fields:
+            
+            * slot - non-categorical slot name (city)
+            * start/exclusive_end - start/end character index of the non-categorical slot value in the current user utterance (113/122)
+        
+        * **state** - dialogue state:
+            
+            * active_intent -  name of an active user intent (FindRestaurants)
+            * requested_slots - a list of slots requested be the user in the current turn
+            * slot_values - dictionary of slot name - slot value pairs ({"city": ["Palo Alto"], "cuisine": ["American"], "price_range": ["moderate"]}) 
+    
+    * Each **system** frame containts the following information ((values in brackets are from the system frame example in Fig. 2):
+        
+        * **actions** - a list with the following values:
+            
+            * act - system act (OFFER)
+            * slot - slot names (restaurant_name)
+            * values - a list of slot values (Bird Dog)
+            * canonical_values (optional) - slot values in their canonicalized form as used by the service
+        
+        * **service** - service name for the current turn (Restaurants_1)
+        * **service_call** (optional) - request sent to the service:
+            
+            * method - a name of the intent or function of the service or API being executed (FindRestaurants)
+            * parameters - a dictionary of slot name -slot value pairs in their canonicalized form ({"city": ["Palo Alto"], "cuisine": ["American"], "price_range": ["moderate"]})
+        
+        * **service_results** - results of a service call:
+            
+            {"city": "Palo Alto",
+            "cuisine": "American",
+            "has_live_music": "False",
+            "phone_number": "650-688-2614",
+            "price_range": "moderate",
+            "restaurant_name": "Bazille",
+            "serves_alcohol": "True",
+            "street_address": "550 Stanford Shopping Center"}
+        
+        * **slots** - a list of slot spans in the system utterance, only provided for non-categorical slots. Each slot span contains the following fields:
+            
+            * slot - non-categorical slot name (city)
+            * start/exclusive_end - start/end character index of the non-categorical slot value in the current user utterance (113/122)
+
+* **speaker** - identifies whether a user or a system is speaking
+* **utterance** - user or system utterance
+
+.. figure:: dst_sgd_user_frame.png
+    
+    Fig. 1: An example of a user frame (source: `a user frame from one of the dialogues <https://raw.githubusercontent.com/google-research-datasets/dstc8-schema-guided-dialogue/master/train/dialogues_001.json>`_).
+
+
+
+.. figure:: dst_sgd_system_frame.png
+
+    Fig. 2: An example of a system frame (source: `a system frame from one of the dialogues <https://raw.githubusercontent.com/google-research-datasets/dstc8-schema-guided-dialogue/master/train/dialogues_001.json>`_).
+
+
+To find more details and download the dataset, use `this link <https://github.com/google-research-datasets/dstc8-schema-guided-dialogue>`_.
+
+Baseline model
+--------------
+
+The SGD dataset for every dataset split (train, dev, test) provides detailed schema files (see `this for an example here <https://github.com/google-research-datasets/dstc8-schema-guided-dialogue/blob/master/train/schema.json>`_).
+These files contain information about slots supported by every service, possible values for categorical slots, along with the supported intents.
+Besides that, the schemas provide a natural language description of the slots, intents, and services; these descriptions are
+utilized by the model to get schema embeddings. Thus, before starting the model training, the training script will create schema embeddings. By default the schema embedding generation
+will be performed every time you run the training script, to skip the schema generation step for all subsequent training script runs, use ``--no_overwrite_schema_emb_files``.
+(see `nlp/data/datasets/sgd_dataset/schema_processor.py <https://github.com/NVIDIA/NeMo/blob/master/nemo/collections/nlp/data/datasets/sgd_dataset/schema_processor.py>`_ for more implementation details). 
+
+.. figure:: dst_sgd_schema_example.png
+
+    Fig. 3: A schema example for a digital wallet service, (source: :cite:`nlp-sgd-rastogi2019towards`)
+
+Another preprocessing step that could be done once and skipped for all future training runs (if you're not changing anything that could affect it) is the dialogues preprocessing step, i.e. breaking dialogues into dialogue turns and collecting labels and features for a particular turn. Use ``no_overwrite_dial_files``
+to overwrite the generated dialogues to skip this step (see `nemo/collections/nlp/data/datasets/sgd_dataset/data_processor.py <https://github.com/NVIDIA/NeMo/blob/master/nemo/collections/nlp/data/datasets/sgd_dataset/data_processor.py>`_ for implementation details).
+
+During training, the Baseline model introduced in :cite:`nlp-sgd-rastogi2019towards` relies on the current user and system utterances and service schemas, compared to the TRADE model that uses all dialogue history.
+The SGD model is learning to understand and extract from the dialogue the following things:
+
+- active intent
+- requested slots
+- categorical slots
+- non-categorical slots
+
+Note that for every abovementioned slot, the model predicts slot status and slot value. Only if the slot status is predicted to be active, the associated slot value is taken into account.
+
+Model components:
+
+- **SGDEncoder** - uses a BERT model to encode user utterance. By default, the SGD model uses the pre-trained BERT base cased model from `Hugging Face Transformers <https://huggingface.co/transformers/>`_ to get embedded representations for schema elements and also to encode user utterance. The SGDEncoder returns encoding of the whole user utterance using 'CLS' token and embedded representation of every token in the utterance.
+- **SGDDecoder** - returns logits for predicted elements by conditioning on the encoded utterance
+
+Training
+--------
+In order to train the Baseline SGD model on a single domain task and evaluate on its dev and test data, run:
+
+.. code-block:: bash
+
+    cd examples/nlp/dialogue_state_tracking
+    python dialogue_state_tracking_sgd.py \
+        --task_name sgd_single_domain \
+        --data_dir PATH_TO/dstc8-schema-guided-dialogue \
+        --schema_embedding_dir PATH_TO/dstc8-schema-guided-dialogue/embeddings/ \
+        --dialogues_example_dir PATH_TO/dstc8-schema-guided-dialogue/dialogue_example_dir \
+        --eval_dataset dev_test
+
+
+Metrics
+-------
+Metrics used for automatic evaluation of the model :cite:`nlp-sgd-rastogi2020schema`:
+
+- **Active Intent Accuracy** - the fraction of user turns for which the active intent has been correctly predicted.
+- **Requested Slot F1** - the macro-averaged F1 score for requested slots over all eligible turns. Turns with no requested slots in ground truth and predictions are skipped.
+- **Average Goal Accuracy** For each turn, we predict a single value for each slot present in the dialogue state. This is the average accuracy of predicting the value of a slot correctly.
+- **Joint Goal Accuracy** - the average accuracy of predicting all slot assignments for a given service in a turn correctly.
+
+The evaluation results are shown for Seen Services (all services seen during model training), Unseen Services (services not seen during training), and All Services (the combination of Seen and Unseen Services).
+Note, during the evaluation, the model first generates predictions and writes them to a file in the same format as the original dialogue files, and then uses these files to compare the predicted dialogue state to the ground truth.
+
+Model Improvements
+------------------
+
+Model improvements added to get better performance results and increase model flexibility:
+
+- data augmentation
+- system retrieval mechanism
+- ability to make schema embeddings trainable during the model training
+
+Results on Single Domain
+------------------------
+
+Seen Services
+
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+|                                                                       |                        Dev set                           |
++                                                                       +-----------------+---------------+-----------+------------+
+| SGD baseline implementations                                          | Active Int Acc  | Req Slot F1   | Aver GA   | Joint GA   |
++=======================================================================+=================+===============+===========+============+
+| Original SGD trained on single domain task                            |      99.06      |     98.67     |   88.08   |    68.58   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo's Implementation of the Baseline                                 |      99.02      |     86.86     |   88.44   |    68.9    |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval                                      |      98.97      |     86.87     |   92.70   |    81.52   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head                     |      98.80      |     86.78     |   93.13   |    83.47   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + data augmentation                  |      98.74      |     87.56     |   93.3    |    82.81   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head + data augmentation |     98.95       |     87.67     |    93.98  |    85.47   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+
+
+
+Unseen Services
+
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+|                                                                       |                        Dev set                           |
++                                                                       +-----------------+---------------+-----------+------------+
+| SGD baseline implementations                                          | Active Int Acc  | Req Slot F1   | Aver GA   | Joint GA   |
++=======================================================================+=================+===============+===========+============+
+| Original SGD trained on single domain task                            |       94.8      |      93.6     |   66.03   |   28.05    |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo's Implementation of the Baseline                                 |       94.56     |      87.91    |   65.75   |   29.34    |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval                                      |      94.22      |     87.99     |   67.18   |   30.565   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head                     |      92.01      |    87.86      |   66.98   |   28.135   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + data augmentation                  |      91.34      |     88.51     |   66.20   |   29.46    |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head + data augmentation |     92.83       |    88.34      |    70.8   |   30.728   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+
+
+
+All Services
+
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+|                                                                       |                        Dev set                           |
++                                                                       +-----------------+---------------+-----------+------------+
+| SGD baseline implementations                                          | Active Int Acc  | Req Slot F1   | Aver GA   | Joint GA   |
++=======================================================================+=================+===============+===========+============+
+| Original SGD trained on single domain task                            |       96.6      |     96.5      |   77.6    |    48.6    |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo's Implementation of the Baseline                                 |       96.78     |     87.39     |   77.15   |    49.01   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval                                      |      96.59      |     87.44     |   80.01   |    55.91   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head                     |      95.39      |    87.32      |   80.13   |    55.66   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + data augmentation                  |      95.05      |     88.04     |   79.82   |    55.99   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+| NeMo baseline + system retrieval + attention head + data augmentation |     95.87       |    88.00      |    82.45  |    57.95   |
++-----------------------------------------------------------------------+-----------------+---------------+-----------+------------+
+
+
+
+
+.. note::
+    This tutorial is based on the code from `examples/nlp/dialogue_state_tracking/dialogue_state_tracking_sgd.py  <https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_sgd.py>`_
+
+
+References
+----------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-SGD
+    :keyprefix: nlp-sgd-
diff --git a/docs/sources/source/nlp/dst_multiwoz_example.png b/docs/sources/source/nlp/dst_multiwoz_example.png
new file mode 100644
index 000000000000..6340335c3630
Binary files /dev/null and b/docs/sources/source/nlp/dst_multiwoz_example.png differ
diff --git a/docs/sources/source/nlp/dst_sgd_schema_example.png b/docs/sources/source/nlp/dst_sgd_schema_example.png
new file mode 100644
index 000000000000..b8e7c533a1f3
Binary files /dev/null and b/docs/sources/source/nlp/dst_sgd_schema_example.png differ
diff --git a/docs/sources/source/nlp/dst_sgd_system_frame.png b/docs/sources/source/nlp/dst_sgd_system_frame.png
new file mode 100644
index 000000000000..b4d269921417
Binary files /dev/null and b/docs/sources/source/nlp/dst_sgd_system_frame.png differ
diff --git a/docs/sources/source/nlp/dst_sgd_user_frame.png b/docs/sources/source/nlp/dst_sgd_user_frame.png
new file mode 100644
index 000000000000..92303db9065b
Binary files /dev/null and b/docs/sources/source/nlp/dst_sgd_user_frame.png differ
diff --git a/docs/sources/source/nlp/dst_trade_architecture.png b/docs/sources/source/nlp/dst_trade_architecture.png
new file mode 100644
index 000000000000..cd42faacf60e
Binary files /dev/null and b/docs/sources/source/nlp/dst_trade_architecture.png differ
diff --git a/docs/sources/source/nlp/glue.rst b/docs/sources/source/nlp/glue.rst
new file mode 100644
index 000000000000..b2dd1543e713
--- /dev/null
+++ b/docs/sources/source/nlp/glue.rst
@@ -0,0 +1,187 @@
+
+Tutorial
+========
+
+In this tutorial, we are going to describe how to finetune a BERT-like model based on `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding <https://arxiv.org/abs/1810.04805>`_ :cite:`nlp-glue-devlin2018bert` on `GLUE: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding <https://openreview.net/pdf?id=rJ4km2R5t7>`_ :cite:`nlp-glue-wang2018glue`. 
+The code used in this tutorial is based on ``examples/nlp/glue_benchmark/glue_benchmark_with_bert.py``.
+
+GLUE tasks description
+----------------------
+
+GLUE Benchmark includes 9 natural language understanding tasks:
+
+Single-Sentence Tasks:
+
+- **CoLA** The Corpus of Linguistic Acceptability :cite:`nlp-glue-warstadt2018neural` is a set of English sentences from published linguistics literature. The task is to predict whether a given sentence is grammatically correct or not.
+- **SST-2** The Stanford Sentiment Treebank :cite:`nlp-glue-socher2013recursive` consists of sentences from movie reviews and human annotations of their sentiment. The task is to predict the sentiment of a given sentence: positive or negative.
+
+
+Similarity and Paraphrase tasks:
+
+- **MRPC** The Microsoft Research Paraphrase Corpus :cite:`nlp-glue-dolan-brockett-2005-automatically` is a corpus of sentence pairs automatically extracted from online news sources, with human annotations for whether the sentences in the pair are semantically equivalent.
+- **QQP** `The Quora Question Pairs2 <https://www.quora.com/q/quoradata/First-Quora-Dataset-Release-Question-Pairs>`_ dataset is a collection of question pairs from the community question-answering website Quora. The task is to determine whether a pair of questions are semantically equivalent. 
+- **STS-B** The Semantic Textual Similarity Benchmark :cite:`nlp-glue-cer2017semeval` is a collection of sentence pairs drawn from news headlines, video, and image captions, and natural language inference data. The task is to determine how similar two sentences are.
+
+
+Inference Tasks:
+
+- **MNLI** The Multi-Genre Natural Language Inference Corpus :cite:`nlp-glue-williams2017broad` is a crowdsourced collection of sentence pairs with textual entailment annotations. Given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral).  The task has the matched (in-domain) and mismatched (cross-domain) sections.
+- **QNLI** The Stanford Question Answering Dataset (:cite: `nlp-glue-rajpurkar2016squad`) is a question-answering dataset consisting of question-paragraph pairs, where one of the sentences in the paragraph (drawn from Wikipedia) contains the answer to the corresponding question. The task is to determine whether the context sentence contains the answer to the question.
+- **RTE** The Recognizing Textual Entailment (RTE) datasets come from a series of annual textual entailment challenges. The task is to determine whether the second sentence is the entailment of the first one or not.
+- **WNLI** The Winograd Schema Challenge :cite:`nlp-glue-levesque2012winograd` is a reading comprehension task in which a system must read a sentence with a pronoun and select the referent of that pronoun from a list of choices.
+
+All the tasks are classification tasks, except for the STS-B task which is a regression task.
+All classification tasks are 2-class tasks, except for the MNLI task which is a 3-class task.
+
+More details about GLUE benchmark could be found `here <https://gluebenchmark.com/tasks>`_.
+
+Training the model
+------------------
+Before running ``examples/nlp/glue_benchmark/glue_benchmark_with_bert.py``, download the GLUE data with `this script <https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e>`_ by running:
+
+.. code-block:: bash
+
+    # download the script to get the GLUE data
+    wget https://gist.githubusercontent.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e/raw/17b8dd0d724281ed7c3b2aeeda662b92809aadd5/download_glue_data.py
+    # run the script to download the GLUE data
+    python download_glue_data.py
+
+After running the above commands, you will have a folder ``glue_data`` with data folders for every GLUE task. For example, data for MRPC task would be under ``glue_data/MRPC``.
+
+The GLUE tasks can be fine-tuned on 4 pre-trained back-bone models supported in NeMo: Megatron-LM BERT, BERT, AlBERT and RoBERTa.
+See the list of available pre-trained Huggingface models `here <https://huggingface.co/transformers/pretrained_models.html>`__. 
+To get the list of all NeMo supported pre-trained models run:
+
+.. code-block:: python
+    
+    import nemo.collections.nlp as nemo_nlp
+    nemo_nlp.nm.trainables.get_pretrained_lm_models_list()
+
+Specify the model to use for training with ``--pretrained_model_name``.
+
+.. note::
+    It's recommended to finetune the model on each task separately.
+    Also, based on `GLUE Benchmark FAQ#12 <https://gluebenchmark.com/faq>`_,
+    there are might be some differences in dev/test distributions for QQP task
+    and in train/dev for WNLI task.
+
+Model training
+--------------
+Use ``--task_name`` argument to run the training script on a specific task, use lower cased task name: ``cola, sst-2, mrpc, sts-b, qqp, mnli, qnli, rte, wnli``.
+
+To run the script on MRPC task on a single GPU, run:
+    
+    .. code-block:: bash
+
+        python glue_benchmark_with_bert.py  \
+            --data_dir /path_to_data_dir/MRPC \
+            --task_name mrpc \
+            --work_dir /path_to_output_folder \
+            --pretrained_model_name bert-base-uncased 
+            
+
+To use multi-gpu training on MNLI task, run:
+
+    .. code-block:: bash
+
+        export NUM_GPUS=4
+        python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS glue_benchmark_with_bert.py \
+            --data_dir=/path_to_data/MNLI \
+            --task_name mnli \
+            --work_dir /path_to_output_folder \
+            --num_gpus=$NUM_GPUS \
+            --pretrained_model_name bert-base-uncased \
+
+More details about multi-gpu training could be found in the `Fast Training <https://nvidia.github.io/NeMo/training.html>`_ section.
+
+For additional model training parameters, please see ``examples/nlp/glue_benchmark_with_bert.py``.
+
+Model results
+-------------
+
+Results after finetuning on the specific task (average result after 3 runs) using different pre-trained models:
+ 
+ .. code-block:: python
+    
+    # to reproduce BERT base paper results
+    --pretrained_model_name bert-base-uncased 
+
+    # Albert-large
+    --pretrained_model_name albert-large-v2
+
+    #Albert-xlarge
+    --pretrained_model_name albert-xlarge-v2
+
++-------+------------------------------+--------------+---------------+----------------+
+| Task  |             Metric           | Albert-large | Albert-xlarge | BERT base paper|
++=======+==============================+==============+===============+================+
+| CoLA  | Matthew's correlation        |     54.94    |     61.72     |     52.1       |
++-------+------------------------------+--------------+---------------+----------------+
+| SST-2 | Accuracy                     |     92.74    |     91.86     |     93.5       |
++-------+------------------------------+--------------+---------------+----------------+
+| MRPC  | F1/Accuracy                  |  92.05/88.97 |  91.87/88.61  |     88.9/-     |
++-------+------------------------------+--------------+---------------+----------------+
+| STS-B | Person/Spearman corr.        |  90.41/90.21 |  90.07/90.10  |     85.8       |
++-------+------------------------------+--------------+---------------+----------------+
+| QQP   | F1/Accuracy                  |  88.26/91.26 |  88.80/91.65  |     71.2/-     |
++-------+------------------------------+--------------+---------------+----------------+
+| MNLI  | Matched acc./Mismatched acc. |  86.69/86.81 |  88.66/88.73  |   84.6/83.4    |
++-------+------------------------------+--------------+---------------+----------------+
+| QNLI  | Accuracy                     |     92.68    |     93.66     |      90.5      |
++-------+------------------------------+--------------+---------------+----------------+
+| RTE   | Accuracy                     |     80.87    |     82.86     |      66.4      |
++-------+------------------------------+--------------+---------------+----------------+
+
+WNLI task was excluded from the experiments due to the problematic WNLI set.
+The dev sets were used for evaluation for Albert models, and the test sets for BERT-base paper results from :cite:`nlp-glue-devlin2018bert`.
+
+Hyperparameters used to get the results from the above table, could be found in the table below.
+Each cell in the table represents the following parameters:
+Number of GPUs used/ Batch Size/ Learning Rate/ Number of Epochs. For not specified parameters, please refer to the default parameters in the training script.
+
++-------+--------------+---------------+
+| Task  | Albert-large | Albert-xlarge |
++=======+==============+===============+
+| CoLA  | 1/32/1e-5/3  |  1/32/1e-5/10 |      
++-------+--------------+---------------+
+| SST-2 | 4/16/2e-5/5  |  4/16/2e-5/12 |     
++-------+--------------+---------------+
+| MRPC  |  1/32/1e-5/5 |  1/16/2e-5/5  |
++-------+--------------+---------------+
+| STS-B | 1/16/2e-5/5  |  1/16/4e-5/12 |    
++-------+--------------+---------------+
+| QQP   |  1/16/2e-5/5 | 4/16/1e-5/12  |     
++-------+--------------+---------------+
+| MNLI  |  4/64/1e-5/5 |  4/32/1e-5/5  |   
++-------+--------------+---------------+
+| QNLI  | 4/16/1e-5/5  |  4/16/1e-5/5  |      
++-------+--------------+---------------+
+| RTE   | 1/16/1e-5/5  | 1//16/1e-5/12 |      
++-------+--------------+---------------+
+
+Evaluating Checkpoints
+----------------------
+
+During training, the model is evaluated after every epoch and by default a folder named "checkpoints" would be created under the working folder specified by `--work_dir` and \
+checkpoints would be stored there. To do evaluation of a pre-trained checkpoint on a dev set, \
+run the same training script by passing `--checkpoint_dir` and setting `--num_epochs` as zero to avoid the training.
+For example, to evaluate a checkpoint trained on MRPC task, run:
+
+.. code-block:: bash
+
+    cd examples/nlp/glue_benchmark
+    python glue_benchmark_with_bert.py  \
+        --data_dir /path_to_data_dir/MRPC \
+        --task_name mrpc \
+        --work_dir /path_to_output_folder \
+        --pretrained_model_name bert-base-uncased \
+        --checkpoint_dir /path_to_output_folder/checkpoints \
+        --num_epochs 0
+
+References
+----------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-GLUE
+    :keyprefix: nlp-glue-
\ No newline at end of file
diff --git a/docs/sources/source/nlp/intro.rst b/docs/sources/source/nlp/intro.rst
index 6cc04c641dd1..4b3dfe071645 100644
--- a/docs/sources/source/nlp/intro.rst
+++ b/docs/sources/source/nlp/intro.rst
@@ -3,6 +3,21 @@
 Natural Language Processing
 ===========================
 
+Supported Tasks and Models:
+
+* Intent Detection and Slot Filling
+* Text Classification
+* State Tracking for Task-oriented Dialogue Systems
+* Language Modelling
+* Neural Machine Translation
+* Question Answering
+* Name Entity Recognition (NER)
+* Punctuation and Capitalization
+* GLUE Benchmark
+* ASR Postprocessing with BERT
+
+All examples from NLP collection can be found `here <https://github.com/NVIDIA/NeMo/tree/master/examples/nlp>`__.
+
 Neural Machine Translation (NMT)
 --------------------------------
 .. toctree::
@@ -10,13 +25,19 @@ Neural Machine Translation (NMT)
 
    neural_machine_translation
 
-BERT
-----
+Pretraining BERT
+-----------------
 .. toctree::
    :maxdepth: 8
 
    bert_pretraining
 
+Megatron-LM for Downstream tasks
+--------------------------------
+.. toctree::
+   :maxdepth: 8
+
+   megatron_finetuning
 
 Transformer Language Model
 --------------------------
@@ -25,6 +46,21 @@ Transformer Language Model
 
    transformer_language_model
 
+GLUE Benchmark
+--------------------------
+.. toctree::
+   :maxdepth: 8
+
+   glue
+
+Dialogue State Tracking
+-----------------------
+
+.. toctree::
+   :maxdepth: 8
+
+   dialogue_state_tracking.rst
+
 
 Named Entity Recognition
 ------------------------
@@ -34,7 +70,6 @@ Named Entity Recognition
 
    ner
 
-
 Punctuation and Word Capitalization
 -----------------------------------
 
@@ -54,13 +89,13 @@ Intent and Slot filling
 
 
 Question Answering 
------------------------
+------------------
 .. toctree::
    :maxdepth: 8
 
    question_answering
 
-Improving speech recognition with BERTx2 post-processing model
+Improving Speech Recognition with BERTx2 Post-processing Model
 --------------------------------------------------------------
 .. toctree::
    :maxdepth: 8
diff --git a/docs/sources/source/nlp/joint_intent_slot_filling.rst b/docs/sources/source/nlp/joint_intent_slot_filling.rst
index 57b82629b0be..72d5f002b234 100644
--- a/docs/sources/source/nlp/joint_intent_slot_filling.rst
+++ b/docs/sources/source/nlp/joint_intent_slot_filling.rst
@@ -1,17 +1,18 @@
 Tutorial
 ========
 
-In this tutorial, we are going to implement a joint intent and slot filling system with pretrained BERT model based on
-`BERT for Joint Intent Classification and Slot Filling <https://arxiv.org/abs/1902.10909>`_ :cite:`nlp-slot-chen2019bert`.
-All code used in this tutorial is based on ``examples/nlp/joint_intent_slot_with_bert.py``.
+In this tutorial, we are going to show the structure of our example on training and evaluating an intent detection and slot filling model with pretrained BERT model. \
+This model is based on a model proposed in `BERT for Joint Intent Classification and Slot Filling <https://arxiv.org/abs/1902.10909>`_ :cite:`nlp-slot-chen2019bert`.
+All the code introduced in this tutorial is based on ``examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py``.
 
-There are four pre-trained BERT models that we can select from using the argument `--pretrained_bert_model`. We're currently
-using the script for loading pre-trained models from `pytorch_transformers`. See the list of available pre-trained models
-`here <https://huggingface.co/pytorch-transformers/pretrained_models.html>`__. 
+There are a variety pre-trained BERT models that we can select as the base encoder for our model. We're currently
+using the script for loading pre-trained models from `transformers`. \
+See the list of available pre-trained models by calling `nemo_nlp.nm.trainables.get_pretrained_lm_models_list()`. \
+The type of the encoder can get defined by the argument `--pretrained_model_name`.
 
 .. tip::
 
-    For pretraining BERT in NeMo and pretrained model checkpoints go to `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
+    For pretraining BERT model in NeMo and also downloading pretrained model checkpoints go to `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
 
 
 Preliminaries
@@ -28,20 +29,31 @@ When `intent_loss_weight = 0.5`, this loss jointly maximizes:
 
 with x being the sequence of n tokens (x1, x2, ..., xn), y being the predicted intent for x, and s1, s2, ..., sn being the predicted slots corresponding to x1, x2, ..., xn.
 
-**Datasets.** 
+**Datasets.**
 
-This model can work with any dataset that follows the format:
+This model can work with any dataset that follows the NeMo's format:
     * input file: a `tsv` file with the first line as a header [sentence][tab][label]
-
     * slot file: slot labels for all tokens in the sentence, separated by space. The length of the slot labels should be the same as the length of all tokens in sentence in input file.
 
+Datasets which are not in this format should get processed and converted into NeMo's format. \
 Currently, the datasets that we provide pre-processing script for include ATIS which can be downloaded
 from `Kaggle <https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk>`_ and the SNIPS spoken language understanding research dataset which can be
-requested from `here <https://github.com/snipsco/spoken-language-understanding-research-datasets>`__.
-You can find the pre-processing script in ``collections/nemo_nlp/nemo_nlp/data/datasets/utils.py``.
+requested from `here <https://github.com/snipsco/spoken-language-understanding-research-datasets>`__. \
+
+You may use ``/examples/nlp/intent_detection_slot_tagging/data/import_datasets.py`` script to process these datasets:
+
+    .. code-block:: python
+
+        cd examples/nlp/intent_detection_slot_tagging/data/
+        python import_datasets.py \
+            --dataset_name <name of the dataset>
+            --source_data_dir <path to data>\
+            --target_data_dir <path to save the processed data in NeMo format>
+
+By setting the dataset_name parameter to one of ['atis', 'snips'], you can process and convert these datasets into NeMo's format. you can also write your own preprocessing scripts for any dataset.
 
 
-Code structure
+Code Structure
 --------------
 
 First, we instantiate Neural Module Factory which defines 1) backend (PyTorch or TensorFlow), 2) mixed precision optimization level,
@@ -49,77 +61,73 @@ First, we instantiate Neural Module Factory which defines 1) backend (PyTorch or
 
     .. code-block:: python
 
-        nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
-                                               local_rank=args.local_rank,
-                                               optimization_level=args.amp_opt_level,
-                                               log_dir=work_dir,
-                                               create_tb_writer=True,
-                                               files_to_copy=[__file__],
-                                               add_time_to_log_dir=True)
+        nf = nemo.core.NeuralModuleFactory(
+            backend=nemo.core.Backend.PyTorch,
+            local_rank=args.local_rank,
+            optimization_level=args.amp_opt_level,
+            log_dir=args.work_dir,
+            checkpoint_dir=args.checkpoint_dir,
+            create_tb_writer=True,
+            files_to_copy=[__file__],
+            add_time_to_log_dir=True,
+        )
 
-We define the tokenizer which transforms text into BERT tokens, using a built-in tokenizer by `pytorch_transformers`.
-This will tokenize text following the mapping of the original BERT model.
+We define the tokenizer which transforms text into BERT tokens, using a built-in tokenizer by `transformers`. \
+NemoBertTokenizer would select and return the appropriate tokenizer for each model.
 
     .. code-block:: python
 
-        from transformers import BertTokenizer
-        hidden_size = pretrained_bert_model.hidden_size
-        tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model=args.pretrained_model_name)
 
 Next, we define all Neural Modules participating in our joint intent slot filling classification pipeline.
 
-    * Process data: the `JointIntentSlotDataDesc` class in `nemo_nlp/nemo_nlp/data/datasets/utils.py` is supposed to do the preprocessing of raw data into the format data supported by `BertJointIntentSlotDataset`. Currently, it supports SNIPS and ATIS raw datasets, but you can also write your own preprocessing scripts for any dataset.
+    * Build data description: the `JointIntentSlotDataDesc` class in `nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/data_descriptor.py` is supposed to do the read the dataset and build its schema.
 
     .. code-block:: python
 
-        data_desc = JointIntentSlotDataDesc(args.data_dir,
-                                                args.do_lower_case,
-                                                args.dataset_name,
-                                                args.none_slot_label,
-                                                args.pad_label)
+        from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import JointIntentSlotDataDesc
+        data_desc = JointIntentSlotDataDesc(
+            data_dir=args.data_dir, none_slot_label=args.none_slot_label, pad_label=args.pad_label
+        )
+
 
-    * Load the pretrained model and get the hidden states for the corresponding inputs.
+    * Load the pre-trained BERT model to encode the corresponding inputs.
 
     .. code-block:: python
 
-        pretrained_bert_model = nemo_nlp.huggingface.BERT(
-            pretrained_model_name=args.pretrained_bert_model
-        )
-        hidden_states = pretrained_bert_model(input_ids=ids,
-                                              token_type_ids=type_ids,
-                                              attention_mask=input_mask)
+        bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+            pretrained_model_name=args.pretrained_model_name)
 
     * Create the classifier heads for our task.
 
     .. code-block:: python
 
-        classifier = nemo_nlp.JointIntentSlotClassifier(
-                                        hidden_size=hidden_size,
-                                        num_intents=num_intents,
-                                        num_slots=num_slots,
-                                        dropout=args.fc_dropout)
+        from nemo.collections.nlp.nm.trainables import JointIntentSlotClassifier
+        classifier = JointIntentSlotClassifier(
+            hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout
+        )
 
-    * Create loss function
+    * Create loss functions for intent detection and slot filling then and use loss aggregator module to merge them
 
     .. code-block:: python
 
-        loss_fn = nemo_nlp.JointIntentSlotLoss(num_slots=data_desc.num_slots)
+        from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM
+        intent_loss_fn = CrossEntropyLossNM(logits_ndim=2)
+        slot_loss_fn = CrossEntropyLossNM(logits_ndim=3)
+        total_loss_fn = LossAggregatorNM(num_inputs=2, weights=[args.intent_loss_weight, 1.0 - args.intent_loss_weight])
 
-    * Create the pipelines for the train and evaluation processes. Each pipeline creates its own data layer (BertJointIntentSlotDataLayer). DataLayer is an extra layer to do the semantic checking for your dataset and convert it into DataLayerNM. You have to define `input_ports` and `output_ports`.
+    * Create the pipelines for the train and evaluation processes. Each pipeline creates its own data layer (BertJointIntentSlotDataLayer).
 
     .. code-block:: python
 
-        def create_pipeline(num_samples=-1,
-                            batch_size=32,
-                            num_gpus=1,
-                            local_rank=0,
-                            mode='train'):
-            logging.info(f"Loading {mode} data...")
-            data_file = f'{data_desc.data_dir}/{mode}.tsv'
-            slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
-            shuffle = args.shuffle_data if mode == 'train' else False
-
-            data_layer = nemo_nlp.BertJointIntentSlotDataLayer(
+        from nemo.collections.nlp.nm.data_layers import BertJointIntentSlotDataLayer
+        def create_pipeline(num_samples=-1, batch_size=32, data_prefix='train', is_training=True, num_gpus=1):
+            logging.info(f"Loading {data_prefix} data...")
+            data_file = f'{data_desc.data_dir}/{data_prefix}.tsv'
+            slot_file = f'{data_desc.data_dir}/{data_prefix}_slots.tsv'
+            shuffle = args.shuffle_data if is_training else False
+
+            data_layer = BertJointIntentSlotDataLayer(
                 input_file=data_file,
                 slot_file=slot_file,
                 pad_label=data_desc.pad_label,
@@ -128,14 +136,12 @@ Next, we define all Neural Modules participating in our joint intent slot fillin
                 num_samples=num_samples,
                 shuffle=shuffle,
                 batch_size=batch_size,
-                num_workers=0,
-                local_rank=local_rank,
                 ignore_extra_tokens=args.ignore_extra_tokens,
-                ignore_start_end=args.ignore_start_end
-                )
+                ignore_start_end=args.ignore_start_end,
+                do_lower_case=args.do_lower_case,
+            )
 
-            ids, type_ids, input_mask, loss_mask, \
-                subtokens_mask, intents, slots = data_layer()
+            input_data = data_layer()
             data_size = len(data_layer)
 
             logging.info(f'The length of data layer is {data_size}')
@@ -148,117 +154,131 @@ Next, we define all Neural Modules participating in our joint intent slot fillin
             steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
             logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
-            hidden_states = pretrained_bert_model(input_ids=ids,
-                                                  token_type_ids=type_ids,
-                                                  attention_mask=input_mask)
+            hidden_states = pretrained_bert_model(
+                input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
+            )
 
             intent_logits, slot_logits = classifier(hidden_states=hidden_states)
 
-            loss = loss_fn(intent_logits=intent_logits,
-                           slot_logits=slot_logits,
-                           loss_mask=loss_mask,
-                           intents=intents,
-                           slots=slots)
+            intent_loss = intent_loss_fn(logits=intent_logits, labels=input_data.intents)
+            slot_loss = slot_loss_fn(logits=slot_logits, labels=input_data.slots, loss_mask=input_data.loss_mask)
+            total_loss = total_loss_fn(loss_1=intent_loss, loss_2=slot_loss)
 
-            if mode == 'train':
-                tensors_to_evaluate = [loss, intent_logits, slot_logits]
+            if is_training:
+                tensors_to_evaluate = [total_loss, intent_logits, slot_logits]
             else:
-                tensors_to_evaluate = [intent_logits, slot_logits, intents,
-                                       slots, subtokens_mask]
+                tensors_to_evaluate = [
+                    intent_logits,
+                    slot_logits,
+                    input_data.intents,
+                    input_data.slots,
+                    input_data.subtokens_mask,
+                ]
 
-            return tensors_to_evaluate, loss, steps_per_epoch, data_layer
+            return tensors_to_evaluate, total_loss, steps_per_epoch, data_layer
 
 
-        train_tensors, train_loss, steps_per_epoch, _ = create_pipeline(
-            args.num_train_samples,
+        train_tensors, train_loss, train_steps_per_epoch, _ = create_pipeline(
+            num_samples=args.num_train_samples,
             batch_size=args.batch_size,
+            data_prefix=args.train_file_prefix,
+            is_training=True,
             num_gpus=args.num_gpus,
-            local_rank=args.local_rank,
-            mode=args.train_file_prefix)
-        eval_tensors, _,  _, data_layer = create_pipeline(
-            args.num_eval_samples,
+        )
+        eval_tensors, _, _, eval_data_layer = create_pipeline(
+            num_samples=args.num_eval_samples,
             batch_size=args.batch_size,
+            data_prefix=args.eval_file_prefix,
+            is_training=False,
             num_gpus=args.num_gpus,
-            local_rank=args.local_rank,
-            mode=args.eval_file_prefix)
+        )
 
     * Create relevant callbacks for saving checkpoints, printing training progresses and evaluating results.
 
     .. code-block:: python
 
-        train_callback = nemo.core.SimpleLossLoggerCallback(
+        from nemo.collections.nlp.callbacks.joint_intent_slot_callback import eval_epochs_done_callback, eval_iter_callback
+        from nemo.core import CheckpointCallback, SimpleLossLoggerCallback
+        train_callback = SimpleLossLoggerCallback(
             tensors=train_tensors,
-            print_func=lambda x: str(np.round(x[0].item(), 3)),
+            print_func=lambda x: logging.info(str(round(x[0].item(), 3))),
             tb_writer=nf.tb_writer,
             get_tb_values=lambda x: [["loss", x[0]]],
-            step_freq=steps_per_epoch)
+            step_freq=steps_per_epoch,
+        )
 
         eval_callback = nemo.core.EvaluatorCallback(
             eval_tensors=eval_tensors,
-            user_iter_callback=lambda x, y: eval_iter_callback(
-                x, y, data_layer),
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
             user_epochs_done_callback=lambda x: eval_epochs_done_callback(
-                x, f'{nf.work_dir}/graphs'),
+                x,
+                intents_label_ids=data_desc.intents_label_ids,
+                slots_label_ids=data_desc.slots_label_ids,
+                graph_fold=f'{nf.work_dir}/graphs',
+                normalize_cm=True
+            ),
             tb_writer=nf.tb_writer,
-            eval_step=steps_per_epoch)
+            eval_step=train_steps_per_epoch,
+        )
 
-        ckpt_callback = nemo.core.CheckpointCallback(
-            folder=nf.checkpoint_dir,
-            epoch_freq=args.save_epoch_freq,
-            step_freq=args.save_step_freq)
+        ckpt_callback = CheckpointCallback(
+            folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
+        )
 
     * Finally, we define the optimization parameters and run the whole pipeline.
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
+        from nemo.utils.lr_policies import get_lr_policy
+        lr_policy_fn = get_lr_policy(
+            args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
+        )
 
-        nf.train(tensors_to_optimize=[train_loss],
-                 callbacks=[train_callback, eval_callback, ckpt_callback],
-                 lr_policy=lr_policy_fn,
-                 optimizer=args.optimizer_kind,
-                 optimization_params={"num_epochs": args.num_epochs,
-                                      "lr": args.lr,
-                                      "weight_decay": args.weight_decay})
+        nf.train(
+            tensors_to_optimize=[train_loss],
+            callbacks=[train_callback, eval_callback, ckpt_callback],
+            lr_policy=lr_policy_fn,
+            optimizer=args.optimizer_kind,
+            optimization_params={"num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay},
+        )
 
-Model training
+Model Training
 --------------
 
-To train a joint intent slot filling model, run ``joint_intent_slot_with_bert.py`` located at ``nemo/examples/nlp``:
+To train an intent detection and slot filling model on a dataset, run ``joint_intent_slot_with_bert.py`` located at ``examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py``:
 
     .. code-block:: python
 
-        python -m torch.distributed.launch --nproc_per_node=2 joint_intent_slot_with_bert.py \
-            --data_dir <path to data>
-            --work_dir <where you want to log your experiment> \
-            --max_seq_length \
-            --optimizer_kind 
-            ...
+        cd examples/nlp/intent_detection_slot_tagging/
+        python joint_intent_slot_with_bert.py \
+            --data_dir <path to data>\
+            --work_dir <where you want to log your experiment>\
 
-To do inference, run:
+By default a folder named "checkpoints" would get created under the working folder specified by `--work_dir` and checkpoints are stored under it.
+To do inference with a checkpoint on test set, you may run:
 
     .. code-block:: python
 
+        cd examples/nlp/intent_detection_slot_tagging/
         python joint_intent_slot_infer.py \
             --data_dir <path to data> \
-            --work_dir <path to checkpoint folder>
-
+            --checkpoint_dir <path to checkpoint folder>\
+            --eval_file_prefix test
 
 To do inference on a single query, run:
-    
+
     .. code-block:: python
 
+        cd examples/nlp/intent_detection_slot_tagging/
         python joint_intent_slot_infer.py \
-            --work_dir <path to checkpoint folder>
+            --checkpoint_dir <path to checkpoint folder>
             --query <query>
 
 
 References
 ----------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-SLOT
     :keyprefix: nlp-slot-
diff --git a/docs/sources/source/nlp/megatron_finetuning.rst b/docs/sources/source/nlp/megatron_finetuning.rst
new file mode 100644
index 000000000000..64a13c8e188f
--- /dev/null
+++ b/docs/sources/source/nlp/megatron_finetuning.rst
@@ -0,0 +1,46 @@
+Megatron-LM for Downstream Tasks
+================================
+
+Megatron :cite:`nlp-megatron-lm-shoeybi2020megatron` is a large, powerful transformer developed by the Applied Deep Learning Research team at NVIDIA.
+More details could be found in `Megatron-LM github repo <https://github.com/NVIDIA/Megatron-LM>`_.
+
+Requirements
+------------
+
+To use Megatron-LM models, please install NVIDIA APEX `from here <https://github.com/NVIDIA/apex>`_. 
+We strongly recommend using one of NGC's recent PyTorch containers (has APEX pre-installed) or NeMo docker container with all dependencies pre-installed, \
+more details could be found in the `Getting Started section <https://nvidia.github.io/NeMo/index.html#getting-started>`_.
+
+Fine-tuning
+-----------
+
+In order to finetune a pretrained Megatron BERT language model on NLP downstream tasks from `examples/nlp  <https://github.com/NVIDIA/NeMo/tree/master/examples/nlp>`_, specify the pretrained_model_name like this: 
+
+.. code-block:: bash
+
+    --pretrained_model_name megatron-bert-345m-uncased
+
+For example, to finetune SQuAD v1.1 with Megatron-LM, run:
+
+.. code-block:: bash
+
+    python question_answering_squad.py  \
+    --train_file PATH_TO_DATA_DIR/squad/v1.1/train-v1.1.json  \
+    --eval_file PATH_TO_DATA_DIR/squad/v1.1/dev-v1.1.json \
+    --pretrained_model_name megatron-bert-345m-uncased
+
+
+If you have a different checkpoint or model configuration, use ``--pretrained_model_name megatron-bert-uncased`` or ``--pretrained_model_name megatron-bert-cased`` and specify ``--bert_config`` and ``--bert_checkpoint`` for your model.
+
+.. note::
+    Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please use downstream task training scripts for all NeMo supported arguments.
+
+
+
+References
+----------
+
+.. bibliography:: nlp_all_refs.bib
+    :style: plain
+    :labelprefix: NLP-MEGATRON-LM
+    :keyprefix: nlp-megatron-lm-
\ No newline at end of file
diff --git a/docs/sources/source/nlp/ner.rst b/docs/sources/source/nlp/ner.rst
index 8a44f3c48cb2..2a0b897e079d 100644
--- a/docs/sources/source/nlp/ner.rst
+++ b/docs/sources/source/nlp/ner.rst
@@ -9,12 +9,19 @@ tutorial. See the :ref:`installation` section for more details.
     For pretraining BERT in NeMo and pretrained model checkpoints go to `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
 
 
+.. _ner_tutorial:
 
 Introduction
 ------------
 
 This tutorial explains how to implement named entity recognition (NER) in NeMo. We'll show how to do this with a pre-trained BERT model, or with one that you trained yourself! For more details, check out our BERT pretraining tutorial.
 
+.. tip::
+
+    We recommend you try this out in a Jupyter notebook. It'll make debugging much easier!
+    See examples/nlp/token_classification/NERWithBERT.ipynb.
+    All code used in this tutorial is based on :ref:`ner_scripts`.
+
 Download Dataset
 ----------------
 
@@ -41,22 +48,17 @@ Each line of the text.txt file contains text sequences, where words are separate
 You can use `this`_ to convert CoNLL-2003 dataset to the format required for training.
 
 
-.. _this: https://github.com/NVIDIA/NeMo/blob/master/scripts/convert_iob_format_to_token_classification_format.py
+.. _this: https://github.com/NVIDIA/NeMo/tree/master/examples/nlp/token_classification/import_from_iob_format.py
 
 
 Training
 --------
 
-.. tip::
-
-    We recommend you try this out in a Jupyter notebook. It'll make debugging much easier!
-    See examples/nlp/NERWithBERT.ipynb
-
 First, we need to create our neural factory with the supported backend. How you should define it depends on whether you'd like to multi-GPU or mixed-precision training. This tutorial assumes that you're training on one GPU, without mixed precision (``optimization_level="O0"``). If you want to use mixed precision, set ``optimization_level`` to ``O1`` or ``O2``.
 
     .. code-block:: python
 
-        WORK_DIR = "output_ner"
+        WORK_DIR = "path_to_output_dir"
         nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
                                            local_rank=None,
                                            optimization_level="O0",
@@ -65,20 +67,23 @@ First, we need to create our neural factory with the supported backend. How you
 
 Next, we'll need to define our tokenizer and our BERT model. There are a couple of different ways you can do this. Keep in mind that NER benefits from casing ("New York City" is easier to identify than "new york city"), so we recommend you use cased models.
 
-If you're using a standard BERT model, you should do it as follows. To see the full list of BERT model names, check out ``nemo_nlp.huggingface.BERT.list_pretrained_models()``
+If you're using a standard BERT model, you should do it as follows. To see the full list of BERT model names, check out ``nemo_nlp.nm.trainables.get_pretrained_lm_models_list()``
 
     .. code-block:: python
 
-        tokenizer = NemoBertTokenizer(pretrained_model="bert-base-cased")
-        bert_model = nemo_nlp.huggingface.BERT(
+        bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
             pretrained_model_name="bert-base-cased")
 
-See examples/nlp/token_classification.py on how to use a BERT model that you pre-trained yourself.
+        tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+            tokenizer_name="nemobert",
+            pretrained_model_name="bert-base-cased")
+
+See examples/nlp/token_classification/token_classification.py on how to use a BERT model that you pre-trained yourself.
 Now, create the train and evaluation data layers:
 
     .. code-block:: python
     
-        train_data_layer = nemo_nlp.BertTokenClassificationDataLayer(
+        train_data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationDataLayer(
             tokenizer=tokenizer,
             text_file=os.path.join(DATA_DIR, 'text_train.txt'),
             label_file=os.path.join(DATA_DIR, 'labels_train.txt'),
@@ -88,7 +93,7 @@ Now, create the train and evaluation data layers:
         label_ids = train_data_layer.dataset.label_ids
         num_classes = len(label_ids)
 
-        eval_data_layer = nemo_nlp.BertTokenClassificationDataLayer(
+        eval_data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationDataLayer(
             tokenizer=tokenizer,
             text_file=os.path.join(DATA_DIR, 'text_dev.txt'),
             label_file=os.path.join(DATA_DIR, 'labels_dev.txt'),
@@ -101,13 +106,11 @@ We need to create the classifier to sit on top of the pretrained model and defin
     .. code-block:: python
 
         hidden_size = bert_model.hidden_size
-        ner_classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
+        ner_classifier = nemo.collections.nlp.nm.trainables.TokenClassifier(hidden_size=hidden_size,
                                               num_classes=num_classes,
                                               dropout=CLASSIFICATION_DROPOUT)
 
-        ner_loss = nemo_nlp.TokenClassificationLoss(d_model=hidden_size,
-                                                num_classes=num_classes,
-                                                dropout=CLASSIFICATION_DROPOUT)
+        ner_loss = CrossEntropyLossNM(logits_ndim=3)
 
 Now, create the train and evaluation datasets:
 
@@ -188,21 +191,23 @@ Finally, we will define our learning rate policy and our optimizer, and start tr
 .. _Tensorboard: https://www.tensorflow.org/tensorboard
 .. _tensorboardX: https://github.com/lanpa/tensorboardX
 
-To train NER with BERT using the provided scripts
--------------------------------------------------
+.. _ner_scripts:
+
+Training and inference scripts
+------------------------------
 
 To run the provided training script:
 
 .. code-block:: bash
 
-    python token_classification.py --data_dir /data/ner/ --work_dir output_ner
+    python examples/nlp/token_classification/token_classification.py --data_dir path_to_data --work_dir path_to_output_dir
 
 To run inference:
 
 .. code-block:: bash
 
-    python token_classification_infer.py --labels_dict /data/ner/label_ids.csv
-    --work_dir output_ner/checkpoints/
+    python examples/nlp/token_classification/token_classification_infer.py --labels_dict path_to_data/label_ids.csv
+    --checkpoint_dir path_to_output_dir/checkpoints/
 
 Note, label_ids.csv file will be generated during training and stored in the data_dir folder.
 
@@ -227,5 +232,5 @@ And then, when you load your BERT model, you should specify the name of the dire
 .. code-block:: python
 
     tokenizer = NemoBertTokenizer(pretrained_model="scibert_scivocab_cased")
-    bert_model = nemo_nlp.huggingface.BERT(
+    bert_model = nemo_nlp.nm.trainables.huggingface.BERT(
         pretrained_model_name="scibert_scivocab_cased")
diff --git a/docs/sources/source/nlp/neural_machine_translation.rst b/docs/sources/source/nlp/neural_machine_translation.rst
index c62744082d66..db39880f43f8 100644
--- a/docs/sources/source/nlp/neural_machine_translation.rst
+++ b/docs/sources/source/nlp/neural_machine_translation.rst
@@ -3,7 +3,7 @@ Tutorial
 
 In this tutorial we are going to implement Neural Machine Translation (NMT) system based on
 `Transformer encoder-decoder architecture <https://arxiv.org/abs/1706.03762>`_ :cite:`nlp-nmt-vaswani2017attention`.
-All code used in this tutorial is based on ``examples/nlp/machine_translation/machine_translation_tutorial.py``.
+All code used in this tutorial is based on ``examples/nlp/neural_machine_translation/machine_translation_tutorial.py``.
 
 Preliminaries
 -------------
@@ -19,7 +19,7 @@ To clean the dataset we remove all sentence pairs such that:
 We use newstest2013 for development and newstest2014 for testing. All datasets, as well as the tokenizer model can be downloaded from
 `here <https://drive.google.com/open?id=1AErD1hEg16Yt28a-IGflZnwGTg9O27DT>`__. In the following steps, we assume that all data is located at **<path_to_data>**.
 
-**Resources.** Training script ``examples/nlp/machine_translation/machine_translation_tutorial.py`` used in this tutorial allows to train Transformer-big architecture
+**Resources.** Training script ``examples/nlp/neural_machine_translation/machine_translation_tutorial.py`` used in this tutorial allows to train Transformer-big architecture
 to **29.2** BLEU / **28.5** SacreBLEU on newstest2014 in approximately 15 hours on NVIDIA's DGX-1 with 16GB Volta GPUs.
 This setup can also be replicated with fewer resources by using more steps of gradient accumulation :cite:`nlp-nmt-ott2018scaling`.
 
@@ -84,14 +84,29 @@ Next, we define all Neural Modules necessary for our model:
         decoder = nemo_nlp.nm.trainables.TransformerDecoderNM(**decoder_params)
         log_softmax = nemo_nlp.nm.trainables.TokenClassifier(**token_classifier_params)
         beam_search = nemo_nlp.nm.trainables.BeamSearchTranslatorNM(**beam_search_params)
-        loss = nemo_nlp.nm.losses.PaddedSmoothedCrossEntropyLossNM(**loss_params)
+        loss = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss(pad_id=tgt_tokenizer.pad_id, label_smoothing=args.label_smoothing)
 
 Following `Press and Wolf, 2016 <https://arxiv.org/abs/1608.05859>`_ :cite:`nlp-nmt-press2016using`, we also tie the parameters of embedding and softmax layers:
 
     .. code-block:: python
 
-        log_softmax.log_softmax.dense.weight = encoder.embedding_layer.token_embedding.weight
-        decoder.embedding_layer.token_embedding.weight = encoder.embedding_layer.token_embedding.weight
+        log_softmax.tie_weights_with(
+                encoder,
+                weight_names=["mlp.last_linear_layer.weight"],
+                name2name_and_transform={
+                    "mlp.last_linear_layer.weight": ("embedding_layer.token_embedding.weight", WeightShareTransform.SAME)
+                },
+            ) 
+        decoder.tie_weights_with(
+            encoder,
+            weight_names=["embedding_layer.token_embedding.weight"],
+            name2name_and_transform={
+                "embedding_layer.token_embedding.weight": (
+                    "embedding_layer.token_embedding.weight",
+                    WeightShareTransform.SAME,
+                )
+            },
+        )
         
     .. note::
         You should not tie the parameters if you use different tokenizers for source and target.
@@ -102,9 +117,8 @@ in **source and target** tokens.
 
     .. code-block:: python
 
-        def create_pipeline(**args):
-            dataset = nemo_nlp.data.TranslationDataset(**translation_dataset_params)
-            data_layer = nemo_nlp.nm.data_layers.TranslationDataLayer(dataset)
+        def create_pipeline(**args):-
+            data_layer = nemo_nlp.nm.data_layers.TranslationDataLayer(**translation_datalayer_params)
             src, src_mask, tgt, tgt_mask, labels, sent_ids = data_layer()
             src_hiddens = encoder(input_ids=src, input_mask_src=src_mask)
             tgt_hiddens = decoder(input_ids_tgt=tgt,
@@ -162,7 +176,7 @@ Finally, we define the optimization parameters and run the whole pipeline.
                                      warmup_steps=args.warmup_steps)
 
         nf.train(tensors_to_optimize=[train_loss],
-                 callbacks=callbacks,
+                 callbacks=[train_callback, eval_callback, ckpt_callback],
                  optimizer=args.optimizer,
                  lr_policy=lr_policy_fn,
                  optimization_params={"num_epochs": max_num_epochs,
@@ -175,7 +189,7 @@ Finally, we define the optimization parameters and run the whole pipeline.
 Model training
 --------------
 
-To train the Transformer-big model, run ``machine_translation_tutorial.py`` located at ``examples/nlp/machine_translation``:
+To train the Transformer-big model, run ``machine_translation_tutorial.py`` located at ``examples/nlp/neural_machine_translation``:
 
     .. code-block:: python
 
@@ -197,24 +211,22 @@ Translation with pretrained model
 
 1. Put your saved checkpoint (or download good checkpoint which obtains 28.5 SacreBLEU on newstest2014 from
 `here <https://ngc.nvidia.com/catalog/models/nvidia:transformer_big_en_de_8k>`__) into **<path_to_ckpt>**.
-2. Run ``machine_translation_tutorial.py`` in an interactive mode::
 
-    python machine_translation_tutorial.py --src_tokenizer_model bpe8k_yttm.model \
-         --eval_datasets test --optimizer novograd --d_model 1024 \
-         --d_inner 4096 --num_layers 6 --num_attn_heads 16 \
-         --restore_checkpoint_from <path_to_ckpt> --interactive
+2. Run ``machine_translation_tutorial.py`` in an interactive mode:
 
+    .. code-block:: bash
 
-   .. image:: interactive_translation.png
-       :align: center
+        python machine_translation_tutorial.py --src_tokenizer_model bpe8k_yttm.model \
+            --eval_datasets test --optimizer novograd --d_model 1024 \
+            --d_inner 4096 --num_layers 6 --num_attn_heads 16 \
+            --restore_checkpoint_from <path_to_ckpt> --interactive
+    
+    .. image:: interactive_translation.png
 
 References
 ----------
 
-References
-------------------
-
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-NMT
     :keyprefix: nlp-nmt-
diff --git a/docs/docs_zh/sources/source/nlp/nlp_all.bib b/docs/sources/source/nlp/nlp_all_refs.bib
similarity index 50%
rename from docs/docs_zh/sources/source/nlp/nlp_all.bib
rename to docs/sources/source/nlp/nlp_all_refs.bib
index d6eb32017e20..990aaed5489c 100644
--- a/docs/docs_zh/sources/source/nlp/nlp_all.bib
+++ b/docs/sources/source/nlp/nlp_all_refs.bib
@@ -129,4 +129,116 @@ @article{chen2019bert
   author={Chen, Qian and Zhuo, Zhu and Wang, Wen},
   journal={arXiv preprint arXiv:1902.10909},
   year={2019}
-}
\ No newline at end of file
+}
+
+
+@article{budzianowski2018multiwoz,
+  title={MultiWOZ-a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling},
+  author={Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, Inigo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica},
+  journal={arXiv preprint arXiv:1810.00278},
+  year={2018}
+}
+
+@article{eric2019multiwoz,
+  title={MultiWOZ 2.1: Multi-domain dialogue state corrections and state tracking baselines},
+  author={Eric, Mihail and Goel, Rahul and Paul, Shachi and Sethi, Abhishek and Agarwal, Sanchit and Gao, Shuyag and Hakkani-Tur, Dilek},
+  journal={arXiv preprint arXiv:1907.01669},
+  year={2019}
+}
+
+
+@article{wu2019transferable,
+  title={Transferable multi-domain state generator for task-oriented dialogue systems},
+  author={Wu, Chien-Sheng and Madotto, Andrea and Hosseini-Asl, Ehsan and Xiong, Caiming and Socher, Richard and Fung, Pascale},
+  journal={arXiv preprint arXiv:1905.08743},
+  year={2019}
+}
+
+
+@article{henderson2015machine,
+  title={Machine learning for dialog state tracking: A review},
+  author={Henderson, Matthew},
+  journal={research.google},
+  year={2015}
+}
+
+@article{shoeybi2020megatron,
+  title={Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism},
+  author={Shoeybi, Mohammad and Patwary, Mostofa and Puri, Raul and LeGresley, Patrick and Casper, Jared and Catanzaro, Bryan},
+  journal={arXiv preprint arXiv:1909.08053},
+  year={2020}
+}
+
+@article{rastogi2019towards,
+  title={Towards scalable multi-domain conversational agents: The schema-guided dialogue dataset},
+  author={Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav and Khaitan, Pranav},
+  journal={arXiv preprint arXiv:1909.05855},
+  year={2019}
+}
+
+@article{rastogi2020schema,
+  title={Schema-Guided Dialogue State Tracking Task at DSTC8},
+  author={Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav and Khaitan, Pranav},
+  journal={arXiv preprint arXiv:2002.01359},
+  year={2020}
+}
+
+@unpublished{wang2018glue
+     title={{GLUE}: A Multi-Task Benchmark and Analysis Platform for Natural Language Understanding},
+     author={Wang, Alex and Singh, Amanpreet and Michael, Julian and Hill, Felix and Levy, Omer and Bowman, Samuel R.},
+     note={arXiv preprint 1804.07461},
+     year={2018}
+ }
+
+@article{warstadt2018neural,
+    title={Neural Network Acceptability Judgments},
+    author={Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R},
+    journal={arXiv preprint arXiv:1805.12471},
+    year={2018}
+}
+
+@inproceedings{socher2013recursive,
+  title={Recursive deep models for semantic compositionality over a sentiment treebank},
+  author={Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning, Christopher D and Ng, Andrew Y and Potts, Christopher},
+  booktitle={Proceedings of the 2013 conference on empirical methods in natural language processing},
+  pages={1631--1642},
+  year={2013}
+}
+
+@inproceedings{dolan-brockett-2005-automatically,
+    title = "Automatically Constructing a Corpus of Sentential Paraphrases",
+    author = "Dolan, William B.  and
+      Brockett, Chris",
+    booktitle = "Proceedings of the Third International Workshop on Paraphrasing ({IWP}2005)",
+    year = "2005",
+    url = "https://www.aclweb.org/anthology/I05-5002",
+}
+
+@article{cer2017semeval,
+  title={Semeval-2017 task 1: Semantic textual similarity-multilingual and cross-lingual focused evaluation},
+  author={Cer, Daniel and Diab, Mona and Agirre, Eneko and Lopez-Gazpio, Inigo and Specia, Lucia},
+  journal={arXiv preprint arXiv:1708.00055},
+  year={2017}
+}
+
+@article{williams2017broad,
+  title={A broad-coverage challenge corpus for sentence understanding through inference},
+  author={Williams, Adina and Nangia, Nikita and Bowman, Samuel R},
+  journal={arXiv preprint arXiv:1704.05426},
+  year={2017}
+}
+
+@article{rajpurkar2016squad,
+  title={Squad: 100,000+ questions for machine comprehension of text},
+  author={Rajpurkar, Pranav and Zhang, Jian and Lopyrev, Konstantin and Liang, Percy},
+  journal={arXiv preprint arXiv:1606.05250},
+  year={2016}
+}
+
+@inproceedings{levesque2012winograd,
+  title={The winograd schema challenge},
+  author={Levesque, Hector and Davis, Ernest and Morgenstern, Leora},
+  booktitle={Thirteenth International Conference on the Principles of Knowledge Representation and Reasoning},
+  year={2012}
+}
+
diff --git a/docs/sources/source/nlp/punctuation.rst b/docs/sources/source/nlp/punctuation.rst
index 6834eced59a2..f2269196f2a5 100644
--- a/docs/sources/source/nlp/punctuation.rst
+++ b/docs/sources/source/nlp/punctuation.rst
@@ -7,6 +7,8 @@ An ASR system typically generates text with no punctuation and capitalization of
 .. tip::
 
     We recommend you to try this example in Jupyter notebook examples/nlp/token_classification/PunctuationWithBERT.ipynb.
+    
+    All code used in this tutorial is based on :ref:`punct_scripts`.
     For pretraining BERT in NeMo and pretrained model checkpoints go to `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
 
 
@@ -26,7 +28,7 @@ Dataset
 This model can work with any dataset as long as it follows the format specified below. For this tutorial, we're going to use the `Tatoeba collection of sentences`_. `This`_ script downloads and preprocesses the dataset. 
 
 .. _Tatoeba collection of sentences: https://tatoeba.org/eng
-.. _This: https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/scripts/get_tatoeba.py
+.. _This: https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/token_classification/get_tatoeba_data.py
 
 
 The training and evaluation data is divided into 2 files: text.txt and labels.txt. Each line of the text.txt file contains text sequences, where words are separated with spaces:
@@ -84,7 +86,7 @@ To download and preprocess a subset of the Tatoeba collection of sentences, run:
 
 .. code-block:: bash
         
-        python ../scripts/get_tatoeba_data.py --data_dir DATA_DIR --num_sample NUM_SAMPLES
+        python get_tatoeba_data.py --data_dir DATA_DIR --num_sample NUM_SAMPLES
 
 Then, we need to create our neural factory with the supported backend. This tutorial assumes that you're training on a single GPU, with mixed precision (``optimization_level="O1"``). If you don't want to use mixed precision, set ``optimization_level`` to ``O0``.
 
@@ -96,12 +98,19 @@ Then, we need to create our neural factory with the supported backend. This tuto
                                            log_dir=WORK_DIR,
                                            placement=nemo.core.DeviceType.GPU)
 
-Next, we'll need to define our tokenizer and our BERT model. If you're using a standard BERT model, you should do it as follows. To see the full list of BERT model names, check out ``nemo_nlp.huggingface.BERT.list_pretrained_models()``
+Next, we'll need to define our tokenizer and our BERT model. Currently, there are 3 pretrained back-bone models supported:
+BERT, ALBERT and RoBERTa. These are pretrained model checkpoints from `transformers <https://huggingface.co/transformers>`__ . Apart from these, the user can also do fine-tuning
+on a custom BERT checkpoint, specified by the `--bert_checkpoint` argument in the training script.
+The pretrained back-bone models can be specified `--pretrained_model_name`.
+See the list of available pre-trained models by calling `nemo_nlp.nm.trainables.get_pretrained_lm_models_list()`. \
 
     .. code-block:: python
 
-        tokenizer = NemoBertTokenizer(pretrained_model=PRETRAINED_BERT_MODEL)
-        bert_model = nemo_nlp.nm.trainables.huggingface.BERT(
+        bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+            pretrained_model_name=PRETRAINED_BERT_MODEL)
+
+        tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+            tokenizer_name="nemobert",
             pretrained_model_name=PRETRAINED_BERT_MODEL)
 
 Now, create the train and evaluation data layers:
@@ -153,12 +162,11 @@ Now, create punctuation and capitalization classifiers to sit on top of the pret
 
       # If you don't want to use weighted loss for Punctuation task, use class_weights=None
       punct_label_freqs = train_data_layer.dataset.punct_label_frequencies
-      class_weights = utils.calc_class_weights(punct_label_freqs)
+      class_weights = nemo.collections.nlp.data.datasets.datasets_utils.calc_class_weights(punct_label_freqs)
 
       # define loss
-      punct_loss = TokenClassificationLoss(num_classes=len(punct_label_ids),
-                                                    class_weights=class_weights)
-      capit_loss = TokenClassificationLoss(num_classes=len(capit_label_ids))
+      punct_loss = CrossEntropyLossNM(logits_ndim=3, weight=class_weights)
+      capit_loss = CrossEntropyLossNM(logits_ndim=3)
       task_loss = LossAggregatorNM(num_inputs=2)
 
 
@@ -335,21 +343,22 @@ Inference results:
         Query: the more you buy the more you save
         Combined: The more you buy, the more you save.
 
+.. _punct_scripts:
 
-To train the model with the provided scripts
---------------------------------------------
+Training and inference scripts
+------------------------------
 
 To run the provided training script:
 
 .. code-block:: bash
 
-    python examples/nlp/token_classification/punctuation_capitalization.py --data_dir path/to/data --pretrained_bert_model=bert-base-uncased --work_dir output
+    python examples/nlp/token_classification/punctuation_capitalization.py --data_dir path_to_data --pretrained_model_name=bert-base-uncased --work_dir path_to_output_dir
 
 To run inference:
 
 .. code-block:: bash
 
-    python examples/nlp/token_classification/punctuation_capitalization_infer.py --punct_labels_dict path/to/data/punct_label_ids.csv --capit_labels_dict path/to/data/capit_label_ids.csv --work_dir output/checkpoints/
+    python examples/nlp/token_classification/punctuation_capitalization_infer.py --punct_labels_dict path_to_data/punct_label_ids.csv --capit_labels_dict path_to_data/capit_label_ids.csv --checkpoint_dir path_to_output_dir/checkpoints/
 
 Note, punct_label_ids.csv and capit_label_ids.csv files will be generated during training and stored in the data_dir folder.
 
@@ -361,4 +370,4 @@ To run training on multiple GPUs, run
 .. code-block:: bash
 
     export NUM_GPUS=2
-    python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS examples/nlp/token_classification/punctuation_capitalization.py --num_gpus $NUM_GPUS --data_dir path/to/data
+    python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS examples/nlp/token_classification/punctuation_capitalization.py --num_gpus $NUM_GPUS --data_dir path_to_data
diff --git a/docs/sources/source/nlp/question_answering.rst b/docs/sources/source/nlp/question_answering.rst
index 98ad59b76d7d..64d900e3db17 100644
--- a/docs/sources/source/nlp/question_answering.rst
+++ b/docs/sources/source/nlp/question_answering.rst
@@ -1,23 +1,53 @@
+.. _squad_model_links:
+
 Tutorial
 ========
 
 In this tutorial, we are going to implement a Question Answering system using the SQuAD dataset with pretrained BERT-like models based on
 `BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding <https://arxiv.org/abs/1810.04805>`_ :cite:`nlp-qa-devlin2018bert`.
-All code used in this tutorial is based on ``examples/nlp/question_answering/question_answering.py``.
+All code used in this tutorial is based on ``examples/nlp/question_answering/question_answering_squad.py``.
 
 
-Currently, there are 3 pretrained back-bone models supported, on which the question answering task SQuAD can be fine-tuned:
-BERT, ALBERT and RoBERTa. These are pretrained model checkpoints from `transformers <https://huggingface.co/transformers>`__ . Apart from these, the user can also do fine-tuning
+Currently, there are 4 pretrained back-bone models supported, on which the question answering task SQuAD can be fine-tuned:
+Megatron-LM BERT, BERT, ALBERT and RoBERTa. These are pretrained model checkpoints from `transformers <https://huggingface.co/transformers>`__ . Apart from these, the user can also do fine-tuning
 on a custom BERT checkpoint, specified by the `--bert_checkpoint` argument.
-The pretrained back-bone models can be specified by `--model_type` and the specific model by `--pretrained_model_name`.
+The pretrained back-bone models can be specified `--pretrained_model_name`.
 See the list of available pre-trained models
 `here <https://huggingface.co/transformers/pretrained_models.html>`__. 
 
+.. _pretrained_models_squad:
+
+Download pretrained models
+--------------------------
+
+Finetuned SQuAD models and model configuration files can be downloaded at following links.
+
+BERT Base uncased models (~330M parameters) finetuned on SQuADv1.1 or SQuADv2.0 dataset: 
+`https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv1 <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv1>`__
+`https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv2 <https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv2>`__
+
+BERT Large uncased models (~110M parameters) finetuned on SQuADv1.1 or SQuADv2.0 dataset: 
+`https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv1 <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv1>`__
+`https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv2 <https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv2>`__
+
+
 .. tip::
 
     For pretraining BERT in NeMo and pretrained model checkpoints go to `BERT pretraining <https://nvidia.github.io/NeMo/nlp/bert_pretraining.html>`__.
 
+Model results:
 
++---------------------------------------------+--------+--------+--------+--------+
+|                                             | SQuADv1.1       | SQuADv2.0       |
++                                             +--------+--------+--------+--------+
+|  Model                                      | EM     |  F1    |  EM    |  F1    |
++=============================================+========+========+========+========+
+| BERT-base-uncased                           | 82.74% | 89.79% | 71.24% | 74.32% |
++---------------------------------------------+--------+--------+--------+--------+
+| BERT-large-uncased                          | 85.79% | 92.28% | 80.17% | 83.32% |
++---------------------------------------------+--------+--------+--------+--------+
+
+On a DGX1 with 8 V100 16GB training on SQuADv1.1 with the default script parameters takes between 14-18 minutes.
 
 Preliminaries
 -------------
@@ -36,16 +66,19 @@ This model can work with any dataset that follows the format:
 
     * training file: a `json` file of this structure
 
-    {"data":[{"title": "string", "paragraphs": [{"context": "string", "qas": [{"question": "string", "id": "number", "answers": [{"answer_start": "number", "text": "string", }]}]}]}]}
+    {"data":[{"title": "string", "paragraphs": [{"context": "string", "qas": [{"question": "string", "is_impossible": "bool", "id": "number", "answers": [{"answer_start": "number", "text": "string", }]}]}]}]}
+    "answers" can also be empty if the model should also learn questions with impossible answers. In this case pass `--version_2_with_negative`
 
     * evaluation file: a `json` file that follows the training file format
-      only that it can provide more than one answer to the same question
+      only that it can provide more than one entry for "answers" to the same question
 
+    * test file: a `json` file that follows the training file format
+      only that it does not require the "answers" keyword. 
 
 Currently, the datasets that we provide pre-processing script for is SQuAD v1.1 and v2.0 
 which can be downloaded
 from `https://rajpurkar.github.io/SQuAD-explorer/ <https://rajpurkar.github.io/SQuAD-explorer/>`_.
-You can find the pre-processing script in ``examples/nlp/scripts/get_squad.py``.
+You can find the pre-processing script in ``examples/nlp/question_answering/get_squad.py``.
 
 
 Code structure
@@ -66,21 +99,6 @@ First, we instantiate Neural Module Factory which defines 1) backend (PyTorch),
                                                files_to_copy=[__file__],
                                                add_time_to_log_dir=True)
 
-We define the tokenizer which transforms text into BERT tokens, using `NemoBertTokenizer`.
-This will tokenize text following the mapping of the original BERT model.
-
-    .. code-block:: python
-
-        hidden_size = model.hidden_size
-        tokenizer = nemo_nlp.data.NemoBertTokenizer(bert_derivate='bert', pretrained_model="bert-base-uncased")
-        # to use RoBERTa tokenizer, run e.g.
-        special_tokens_roberta = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['roberta']
-        tokenizer = nemo_nlp.data.NemoBertTokenizer(bert_derivate='roberta', pretrained_model="roberta-base", special_tokens=special_tokens_roberta)
-        # to use Albert tokenizer, run e.g.
-        special_tokens_albert = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['albert']
-        tokenizer = nemo_nlp.data.NemoBertTokenizer(bert_derivate='albert', pretrained_model="albert-base-v1", special_tokens=special_tokens_albert)
-
-
 Next, we define all Neural Modules participating in our question answering classification pipeline.
 
     * Process data: the `BertQuestionAnsweringDataLayer` is supposed to do the preprocessing of raw data into the format data supported by `SquadDataset`.
@@ -91,25 +109,27 @@ Next, we define all Neural Modules participating in our question answering class
     .. code-block:: python
 
         data_layer = nemo_nlp.nm.data_layers.BertQuestionAnsweringDataLayer(
+                                mode="train",
                                 data_file=args.train_file,
                                 tokenizer=tokenizer,
                                 batch_size=args.batch_size,
-                                mode='train',
                                 version_2_with_negative=args.version_2_with_negative,
                                 max_query_length=args.max_query_length,
                                 max_seq_length=args.max_seq_length,
-                                doc_stride=args.doc_stride)
+                                doc_stride=args.doc_stride,
+                                use_cache=args.use_data_cache)
 
         
         data_layer_eval = nemo_nlp.nm.data_layers.BertQuestionAnsweringDataLayer(
-                                data_file=args.dev_file,
+                                mode='eval',
+                                data_file=args.eval_file,
                                 tokenizer=tokenizer,
                                 batch_size=args.batch_size,
-                                mode='dev',
                                 version_2_with_negative=args.version_2_with_negative,
                                 max_query_length=args.max_query_length,
                                 max_seq_length=args.max_seq_length,
-                                doc_stride=args.doc_stride)
+                                doc_stride=args.doc_stride,
+                                use_cache=args.use_data_cache)
 
     * Load the pretrained model and get the hidden states for the corresponding inputs.
 
@@ -124,6 +144,14 @@ Next, we define all Neural Modules participating in our question answering class
         args.pretrained_model_name = "albert-base-v1"
         model = nemo_nlp.nm.trainables.huggingface.Albert(args.pretrained_model_name)
 
+    * Define the tokenizer which transforms text into BERT tokens, using `NemoBertTokenizer`. This will tokenize text following the mapping of the original BERT model.
+
+    .. code-block:: python
+
+        hidden_size = model.hidden_size
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model=args.pretrained_model_name)
+
+
     * Create the classifier head for our task.
 
     .. code-block:: python
@@ -138,7 +166,7 @@ Next, we define all Neural Modules participating in our question answering class
 
     .. code-block:: python
 
-        loss_fn = nemo_nlp.nm.losses.QuestionAnsweringLoss()
+        loss_fn = nemo_nlp.nm.losses.SpanningLoss()
 
     * Create the pipelines for the train and evaluation processes. 
 
@@ -170,8 +198,7 @@ Next, we define all Neural Modules participating in our question answering class
             logits=qa_logits_eval,
             start_positions=input_data_eval.start_positions,
             end_positions=input_data_eval.end_positions)
-        eval_tensors = [loss_outputs_eval.start_logits, loss_outputs_eval.end_logits, 
-                        input_data_eval.unique_ids]
+        eval_tensors = [input_data_eval.unique_ids, loss_outputs_eval.start_logits, loss_outputs_eval.end_logits]
 
 
 
@@ -225,44 +252,58 @@ Next, we define all Neural Modules participating in our question answering class
 Model training
 --------------
 
+To run on a single GPU, run:
+    
+    .. code-block:: python
+
+        python question_answering_squad.py \
+            ...
+            
 To train a question answering model on SQuAD using multi-gpu, run ``question_answering_squad.py`` located at ``examples/nlp/question_answering``:
 
     .. code-block:: python
 
         python -m torch.distributed.launch --nproc_per_node=8 question_answering_squad.py 
             --train_file <path to train file in *.json format>
-            --dev_file <path to evaluation file in *.json format>
+            --eval_file <path to evaluation file in *.json format>
             --num_gpus 8
             --work_dir <where you want to log your experiment> 
             --amp_opt_level <amp optimization level> 
             --pretrained_model_name <type of model to use> 
+            --bert_checkpoint <pretrained bert checkpoint>
+            --bert_config <model configuration file>
+            --mode "train_eval"
             ...
 
-To do inference, run:
+For model configuration files and checkpoints, see :ref:`pretrained_models_squad`.
+
+To run evaluation:
 
     .. code-block:: python
 
-        python -m torch.distributed.launch --nproc_per_node=8 question_answering_squad.py 
-            --dev_file <path to evaluation file in *.json format>
-            --num_gpus 8
-            --checkpoint_dir <path to checkpoint folder>
-            --evaluation_only
+        python question_answering_squad.py 
+            --eval_file <path to evaluation file in *.json format>
+            --checkpoint_dir <path to trained SQuAD checkpoint folder>
+            --mode "eval"
             --output_prediction_file <path to output file where predictions are written into>
             ...
 
+To run inference:
 
-To run on a single GPU, run:
-    
     .. code-block:: python
 
-        python question_answering_squad.py \
+        python question_answering_squad.py 
+            --test_file <path to evaluation file in *.json format>
+            --checkpoint_dir <path to trained SQuAD checkpoint folder>
+            --mode "test"
+            --output_prediction_file <path to output file where predictions are written into>
             ...
 
 
 References
 ----------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-QA
     :keyprefix: nlp-qa-
\ No newline at end of file
diff --git a/docs/sources/source/nlp/transformer_language_model.rst b/docs/sources/source/nlp/transformer_language_model.rst
index 02ef4d502501..011a93763a1c 100644
--- a/docs/sources/source/nlp/transformer_language_model.rst
+++ b/docs/sources/source/nlp/transformer_language_model.rst
@@ -1,5 +1,5 @@
-Transformer Language Model
-==========================
+Tutorial
+========
 
 In this tutorial, we will build and train a language model using the Transformer architecture :cite:`nlp-lm-vaswani2017attention`.
 Make sure you have ``nemo`` and ``nemo_nlp`` installed before starting this tutorial. See the :ref:`installation` section for more details.
@@ -15,7 +15,7 @@ Download Corpus
 
 For demonstration purposes, we will be using the very small WikiText-2 dataset :cite:`nlp-lm-merity2016pointer`.
 
-To download the dataset, run the script ``examples/nlp/scripts/get_wt2.sh``. After downloading and unzipping, the folder should include 3 files that look like this:
+To download the dataset, run the script ``examples/nlp/language_modeling/get_wkt2.sh <FOLDER_FOR_DATA>``. After downloading and unzipping, the folder ``<FOLDER_FOR_DATA>`` should include 3 files that look like this:
 
     .. code-block:: bash
 
@@ -29,13 +29,15 @@ Create the tokenizer model
 
     .. code-block:: python
 
+        from nemo.collections.nlp.data.datasets.lm_transformer_dataset import LanguageModelDataDesc
         data_desc = LanguageModelDataDesc(
             args.dataset_name, args.data_dir, args.do_lower_case)
 
-We need to define our tokenizer. We use `WordTokenizer` defined in ``nemo_nlp/data/tokenizers/word_tokenizer.py``:
+We need to define our tokenizer. We use `WordTokenizer` defined in ``nemo/collections/nlp/data/tokenizers/word_tokenizer.py``:
 
     .. code-block:: python
 
+        import nemo.collections.nlp as nemo_nlp
         tokenizer = nemo_nlp.WordTokenizer(f"{args.data_dir}/{args.tokenizer_model}")
         vocab_size = 8 * math.ceil(tokenizer.vocab_size / 8)
 
@@ -65,51 +67,72 @@ Next, we define all Neural Modules necessary for our model
 
     .. code-block:: python
 
-        encoder = nemo_nlp.TransformerEncoderNM(**params)
-        log_softmax = nemo_nlp.TokenClassifier(**params)
-        loss = nemo_nlp.PaddedSmoothedCrossEntropyLossNM(**params)
-
+        from nemo.collections.nlp.nm.trainables.common import TokenClassifier
+        from nemo.collections.nlp.nm.losses import SmoothedCrossEntropyLoss
+
+        encoder = nemo_nlp.nm.trainables.TransformerEncoderNM(
+            d_model=args.d_model,
+            d_inner=args.d_inner,
+            num_layers=args.num_layers,
+            embedding_dropout=args.embedding_dropout,
+            num_attn_heads=args.num_attn_heads,
+            ffn_dropout=args.ffn_dropout,
+            vocab_size=vocab_size,
+            mask_future=True,
+            attn_score_dropout=args.attn_score_dropout,
+            attn_layer_dropout=args.attn_layer_dropout,
+            max_seq_length=args.max_seq_length,
+        )
+
+        log_softmax = TokenClassifier(
+            args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True
+        )
+
+        loss = SmoothedCrossEntropyLoss(pad_id=tokenizer.pad_id, label_smoothing=args.label_smoothing)
 
 Following `Press and Wolf, 2016 <https://arxiv.org/abs/1608.05859>`_ :cite:`nlp-lm-press2016using`, we also tie the parameters of embedding and softmax layers:
 
     .. code-block:: python
 
-        log_softmax.mlp.layers[-1].weight = encoder.embedding_layer.token_embedding.weight
-
+        from nemo.core import WeightShareTransform
+        log_softmax.tie_weights_with(
+            encoder,
+            weight_names=["mlp.layer0.weight"],
+            name2name_and_transform={
+                "mlp.layer0.weight": ("embedding_layer.token_embedding.weight", WeightShareTransform.SAME)
+            },
+        )
 
-Next, we create datasets for training and evaluating:
+Then, we create the pipeline from input to output that can be used for both training and evaluation:
 
     .. code-block:: python
 
-        train_dataset = nemo_nlp.LanguageModelingDataset(
-            tokenizer,
-            dataset=f"{args.data_dir}/{args.train_dataset}",
-            max_sequence_length=args.max_sequence_length,
-            batch_step=args.max_sequence_length)
-
-        eval_dataset = nemo_nlp.LanguageModelingDataset(
-            tokenizer,
-            dataset=f"{args.data_dir}/{args.eval_datasets[0]}",
-            max_sequence_length=args.max_sequence_length,
-            batch_step=args.predict_last_k)
-
-
-Then, we create the pipeline gtom input to output that can be used for both training and evaluation:
-
-    .. code-block:: python
+        from nemo.collections.nlp.nm.data_layers import LanguageModelingDataLayer
 
-        def create_pipeline(dataset, batch_size):
-            data_layer = nemo_nlp.LanguageModelingDataLayer(dataset,
-                                                            batch_size=batch_size)
+        def create_pipeline(
+            dataset, max_seq_length=args.max_seq_length, batch_step=args.max_seq_length, batch_size=args.batch_size
+        ):
+            data_layer = LanguageModelingDataLayer(
+                dataset, tokenizer, max_seq_length, batch_size, batch_step
+            )
             src, src_mask, labels = data_layer()
             src_hiddens = encoder(input_ids=src, input_mask_src=src_mask)
             logits = log_softmax(hidden_states=src_hiddens)
-            return loss(logits=logits, target_ids=labels)
-
-
-        train_loss = create_pipeline(train_dataset, args.batch_size)
-        eval_loss = create_pipeline(eval_dataset, args.batch_size)
-    
+            return loss(logits=logits, labels=labels)
+
+
+        train_loss = create_pipeline(
+            f"{args.data_dir}/{args.train_dataset}",
+            args.max_seq_length,
+            batch_step=args.max_seq_length,
+            batch_size=args.batch_size,
+        )
+        eval_loss = create_pipeline(
+            f"{args.data_dir}/{args.eval_dataset}",
+            args.max_seq_length,
+            batch_step=args.predict_last_k,
+            batch_size=args.eval_batch_size,
+        )
 
 Next, we define necessary callbacks:
 
@@ -119,34 +142,59 @@ Next, we define necessary callbacks:
 
     .. code-block:: python
 
-        train_callback = nemo.core.SimpleLossLoggerCallback(...)
-        eval_callback = nemo.core.EvaluatorCallback(...)
-        ckpt_callback = nemo.core.CheckpointCallback(...)
-
+        from nemo.collections.nlp.callbacks.lm_transformer_callback import eval_epochs_done_callback, eval_iter_callback
+        train_callback = SimpleLossLoggerCallback(
+            tensors=train_tensors,
+            print_func=lambda x: logging.info(str(round(x[0].item(), 3))),
+            tb_writer=nf.tb_writer,
+            get_tb_values=lambda x: [["loss", x[0]]],
+            step_freq=steps_per_epoch,
+        )
+
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=eval_tensors,
+            user_iter_callback=lambda x, y: eval_iter_callback(x, y, data_layer),
+            user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),
+            tb_writer=nf.tb_writer,
+            eval_step=steps_per_epoch,
+        )
+
+        # Create callback to save checkpoints
+        ckpt_callback = CheckpointCallback(
+            folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
+        )
 
 Finally, you should define your optimizer, and start training!
 
     .. code-block:: python
 
-        lr_policy_fn = get_lr_policy(args.lr_policy,
-                                     total_steps=args.num_epochs * steps_per_epoch,
-                                     warmup_ratio=args.lr_warmup_proportion)
-
-        nf.train(tensors_to_optimize=[train_loss],
-                 callbacks=callbacks,
-                 lr_policy=lr_policy_fn,
-                 batches_per_step=args.iter_per_step,
-                 optimizer=args.optimizer_kind,
-                 optimization_params={"num_epochs": args.num_epochs,
-                                      "lr": args.lr,
-                                      "weight_decay": args.weight_decay,
-                                      "betas": (args.beta1, args.beta2)})
-
+        from nemo.utils.lr_policies import CosineAnnealing
+
+        lr_policy_fn = CosineAnnealing(args.max_steps, warmup_steps=args.warmup_steps)
+        max_num_epochs = 0 if args.interactive else args.num_epochs
+
+        callbacks = [callback_ckpt]
+        if not args.interactive:
+            callbacks.extend([train_callback, eval_callback])
+
+        nf.train(
+            tensors_to_optimize=[train_loss],
+            callbacks=callbacks,
+            lr_policy=lr_policy_fn,
+            batches_per_step=args.iter_per_step,
+            optimizer=args.optimizer_kind,
+            optimization_params={
+                "num_epochs": args.num_epochs,
+                "lr": args.lr,
+                "weight_decay": args.weight_decay,
+                "betas": (args.beta1, args.beta2),
+            },
+        )
 
 References
 ----------
 
-.. bibliography:: nlp_all.bib
+.. bibliography:: nlp_all_refs.bib
     :style: plain
     :labelprefix: NLP-LM
     :keyprefix: nlp-lm-
diff --git a/docs/sources/source/speaker_recognition/datasets.rst b/docs/sources/source/speaker_recognition/datasets.rst
new file mode 100644
index 000000000000..2e81b8fe3830
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/datasets.rst
@@ -0,0 +1,19 @@
+Datasets
+========
+
+HI-MIA
+--------
+
+Run the script to download and process hi-mia dataset in order to generate files in the supported format of  `nemo_asr`. You should set the data folder of 
+hi-mia using `--data_root`. These scripts are present in <nemo_root>/scripts
+
+.. code-block:: bash
+
+    python get_hi-mia_data.py --data_root=<data directory> 
+
+After download and conversion, your `data` folder should contain directories with follwing set of files as:
+
+* `data/<set>/train.json`
+* `data/<set>/dev.json` 
+* `data/<set>/{set}_all.json` 
+* `data/<set>/utt2spk`
\ No newline at end of file
diff --git a/docs/sources/source/speaker_recognition/installation_link.rst b/docs/sources/source/speaker_recognition/installation_link.rst
new file mode 100644
index 000000000000..cef1c53239c9
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/installation_link.rst
@@ -0,0 +1 @@
+.. include:: ../asr/installation.rst
diff --git a/docs/sources/source/speaker_recognition/intro.rst b/docs/sources/source/speaker_recognition/intro.rst
new file mode 100644
index 000000000000..242d1a6c4db2
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/intro.rst
@@ -0,0 +1,17 @@
+.. _speaker-recognition-docs:
+
+
+Speaker Recognition
+===================
+
+.. toctree::
+   :maxdepth: 8
+
+   installation_link
+   tutorial
+   datasets
+   models
+
+
+
+
diff --git a/docs/sources/source/speaker_recognition/models.rst b/docs/sources/source/speaker_recognition/models.rst
new file mode 100644
index 000000000000..0a577d8758dd
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/models.rst
@@ -0,0 +1,8 @@
+Models
+====================
+
+.. toctree::
+   :maxdepth: 8
+
+   quartznet
+
diff --git a/docs/sources/source/speaker_recognition/quartznet.rst b/docs/sources/source/speaker_recognition/quartznet.rst
new file mode 100644
index 000000000000..d16ab172f86b
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/quartznet.rst
@@ -0,0 +1,32 @@
+QuartzNet
+---------
+
+QuartzNet is a version of Jasper that utilizes separable convolutions and larger filters. It can achieve performance
+similar to Jasper but with an order of magnitude less parameters.
+Similar to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] where B is the number of blocks, and R -
+the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D *separable* convolution, batch
+normalization, ReLU, and dropout:
+
+We use a Quartznet 3x2 model with narrow filters. This encoder is connected to the decoder by using a statistics pooling layer.
+We experimented with various statistics pooling layers including a gram layer, a x-vector pooling layer, or a super vector layer which is a combination of the gram and x-vector layers.
+The xvector layer is based on mean and variance based statistics pooling, it is faster to train and very stable.
+
+    .. image:: ../asr/quartz_vertical.png
+        :align: center
+        :alt: quartznet model
+
+`QuartzNet paper <https://arxiv.org/abs/1910.10261>`_.
+
+For a dataset with ~400 hours, this model should finish 25 epochs in under 8 hours on single Quadro GV100.
+
+============== ================= ===================== ====================== ==========
+Network            Trained             Evaluated           cosine similarity     PLDA
+                    Dataset             trial-set              EER               EER
+============== ================= ===================== ====================== ==========
+QuartzNet3x2        hi-mia                hi-mia               8.72%             6.32%
+QuartzNet3x2        voxceleb1             ffsvc-dev            14.22%            7.12%
+                    hi-mia
+                    aishell
+                    voxceleb2
+============== ================= ===================== ====================== ==========
+
diff --git a/docs/sources/source/speaker_recognition/speaker.bib b/docs/sources/source/speaker_recognition/speaker.bib
new file mode 100644
index 000000000000..4a4a3f00a6f4
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/speaker.bib
@@ -0,0 +1,30 @@
+@article{kriman2019quartznet,
+  title={Quartznet: Deep automatic speech recognition with 1d time-channel separable convolutions},
+  author={Kriman, Samuel and Beliaev, Stanislav and Ginsburg, Boris and Huang, Jocelyn and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Zhang, Yang},
+  journal={arXiv preprint arXiv:1910.10261},
+  year={2019}
+}
+
+@article{nagrani2017voxceleb,
+  title={Voxceleb: a large-scale speaker identification dataset},
+  author={Nagrani, Arsha and Chung, Joon Son and Zisserman, Andrew},
+  journal={arXiv preprint arXiv:1706.08612},
+  year={2017}
+}
+
+@misc{himia,
+    title={HI-MIA : A Far-field Text-Dependent Speaker Verification Database and the Baselines},
+    author={Xiaoyi Qin and Hui Bu and Ming Li},
+    year={2019},
+    eprint={1912.01231},
+    archivePrefix={arXiv},
+    primaryClass={cs.SD}
+}
+
+
+@article{li2019jasper,
+  title={Jasper: An end-to-end convolutional neural acoustic model},
+  author={Li, Jason and Lavrukhin, Vitaly and Ginsburg, Boris and Leary, Ryan and Kuchaiev, Oleksii and Cohen, Jonathan M and Nguyen, Huyen and Gadde, Ravi Teja},
+  journal={arXiv preprint arXiv:1904.03288},
+  year={2019}
+}
\ No newline at end of file
diff --git a/docs/sources/source/speaker_recognition/tutorial.rst b/docs/sources/source/speaker_recognition/tutorial.rst
new file mode 100644
index 000000000000..d3cfbc5eb838
--- /dev/null
+++ b/docs/sources/source/speaker_recognition/tutorial.rst
@@ -0,0 +1,44 @@
+Tutorial
+========
+
+Make sure you have installed ``nemo`` and the ``nemo_asr`` collection.
+See the :ref:`installation` section.
+
+.. note::
+
+    You need to have ``nemo`` and the ``nemo_asr`` collection for this tutorial.
+    It is also necessary to install `torchaudio` in order to use MFCC preprocessing.
+
+
+Introduction
+------------
+
+Speaker Recognition (SR) is a broad research area which solves two major tasks: speaker identification (who is speaking?) and
+speaker verification (is the speaker who they claim to be?). In this work, we focus on far-field,
+text-independent speaker recognition when the identity of the speaker is based on how the speech is spoken,
+not necessarily in what is being said. Typically such SR systems operate on unconstrained speech utterances,
+which are converted into vectors of fixed length, called speaker embeddings. Speaker embeddings are also used in
+automatic speech recognition (ASR) and speech synthesis.
+
+As the goal of most speaker related systems is to get good speaker level embeddings that could help distinguish from
+other speakers, we shall first train these embeddings in end-to-end
+manner optimizing the QuatzNet based :cite:`speaker-tut-kriman2019quartznet` encoder model on cross-entropy loss.
+We modify the decoder to get these fixed size embeddings irrespective of the length of ithe nput audio. We employ a mean and variance
+based statistics pooling method to grab these embeddings.
+
+In this tutorial, we shall first train these embeddings on speaker related datasets. Then, we get speaker embeddings from a
+pretrained network for a new dataset. Lastly we score them using cosine similarity or optionally with a PLDA backend.
+
+
+Jupyter Notebooks containing all the steps to download the dataset, train a model and evaluate its results
+are available at : `Speaker Recognition an4 example <https://github.com/NVIDIA/NeMo/blob/master/examples/speaker_recognition/notebooks/Speaker_Recognition_an4.ipynb>`_
+For advanced setups, try the tutorial utilizing the hi-mia dataset at: `Speaker Recognition hi-mia example <https://github.com/NVIDIA/NeMo/blob/master/examples/speaker_recognition/notebooks/Speaker_Recognition_hi-mia.ipynb>`_
+
+
+References
+----------
+
+.. bibliography:: speaker.bib
+    :style: plain
+    :labelprefix: SPEAKER-TUT
+    :keyprefix: speaker-tut-
diff --git a/docs/sources/source/speech_command/datasets.rst b/docs/sources/source/speech_command/datasets.rst
new file mode 100644
index 000000000000..d59ec9d6a2bb
--- /dev/null
+++ b/docs/sources/source/speech_command/datasets.rst
@@ -0,0 +1,42 @@
+Datasets
+========
+
+.. _GoogleSpeechCommands_dataset:
+
+
+Google Speech Commands Dataset
+-----------------------------------
+
+The ability to recognize spoken commands with high accuracy can be useful in a variety of contexts.
+To this end, Google released the Speech Commands dataset (see :cite:`speech-recognition-dataset-warden2018speech`,
+which contains short audio clips of a fixed number of command words such as “stop”, “go”, “up”, “down”, etc spoken by a large number of speakers.
+To promote the use of the set, Google also hosted a Kaggle competition, in which the winning team attained a multi-class accuracy of 91%.
+
+We experimented with applying NeMo’s ASR classification models on mel spectrogram of the audio clips and found that they worked surprisingly well.
+Adding data augmentation further improved the results.
+
+Dataset
+-----------------------------------
+
+Google released two versions of the dataset with the first version containing 65k samples over 30 classes and the second containing 110k samples over 35 classes.
+We refer to these datasets as v1 and v2, and currently we have metrics for v1 version in order to compare to the different metrics used by other papers.
+
+Run the script `process_speech_commands_data.py` to process Google Speech Commands dataset in order to generate files in the supported format of  `nemo_asr`,
+which can be found in the `scripts` sub-directory of the nemo base directory. You should set the data folder of Speech Commands using `--data_root` and the version of the dataset using `--data_version` as an int.
+
+You can further rebalance the train set by passing the `--rebalance` flag.
+
+.. code-block:: bash
+
+    python process_speech_commands_data.py --data_root=<data directory> --data_version=<1 or 2> {--rebalance}
+
+Then, you should have `train_manifest.json`, `validation_manifest.json` and `test_manifest.json`
+in the directory `{data_root}/google_speech_recognition_v{1/2}`.
+
+References
+----------
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-DATASET
+    :keyprefix: speech-recognition-dataset-
\ No newline at end of file
diff --git a/docs/sources/source/speech_command/installation_link.rst b/docs/sources/source/speech_command/installation_link.rst
new file mode 100644
index 000000000000..cef1c53239c9
--- /dev/null
+++ b/docs/sources/source/speech_command/installation_link.rst
@@ -0,0 +1 @@
+.. include:: ../asr/installation.rst
diff --git a/docs/sources/source/speech_command/intro.rst b/docs/sources/source/speech_command/intro.rst
new file mode 100644
index 000000000000..3e597be9a62c
--- /dev/null
+++ b/docs/sources/source/speech_command/intro.rst
@@ -0,0 +1,16 @@
+.. _speech-command-docs:
+
+
+Speech Commands
+==================
+
+.. toctree::
+   :maxdepth: 8
+
+   installation_link
+   tutorial
+   datasets
+   models
+
+
+
diff --git a/docs/sources/source/speech_command/models.rst b/docs/sources/source/speech_command/models.rst
new file mode 100644
index 000000000000..0a577d8758dd
--- /dev/null
+++ b/docs/sources/source/speech_command/models.rst
@@ -0,0 +1,8 @@
+Models
+====================
+
+.. toctree::
+   :maxdepth: 8
+
+   quartznet
+
diff --git a/docs/sources/source/speech_command/quartz_vertical.png b/docs/sources/source/speech_command/quartz_vertical.png
new file mode 100644
index 000000000000..39ef7534c783
Binary files /dev/null and b/docs/sources/source/speech_command/quartz_vertical.png differ
diff --git a/docs/sources/source/speech_command/quartznet.rst b/docs/sources/source/speech_command/quartznet.rst
new file mode 100644
index 000000000000..e55bef6a9c9b
--- /dev/null
+++ b/docs/sources/source/speech_command/quartznet.rst
@@ -0,0 +1,38 @@
+QuartzNet
+---------
+
+QuartzNet is a version of Jasper :cite:`speech-recognition-models-li2019jasper` model with separable convolutions and larger filters. It can achieve performance
+similar to Jasper but with an order of magnitude less parameters.
+Similarly to Jasper, QuartzNet family of models are denoted as QuartzNet_[BxR] where B is the number of blocks, and R - the number of convolutional sub-blocks within a block. Each sub-block contains a 1-D *separable* convolution, batch normalization, ReLU, and dropout:
+
+These models are trained on Google Speech Commands dataset (V1 - all 30 classes).
+
+    .. image:: quartz_vertical.png
+        :align: center
+        :alt: quartznet model
+   
+`QuartzNet paper <https://arxiv.org/abs/1910.10261>`_.
+
+These QuartzNet models were trained for 200 epochs using mixed precision on 2 GPUs with a batch size of 128 over 200 epochs.
+On 2 Quadro GV100 GPUs, training time is approximately 1 hour.
+
+=============================== ===================== ============
+Network                         Dataset               Results
+=============================== ===================== ============
+QuartzNet3x1 (77k params)       Speech Commands V1    97.32% Test
+
+QuartzNet3x2 (93k params)       Speech Commands V1    97.69% Test
+
+QuartzNet3x1 (77k params)       Speech Commands V2    97.12% Test
+
+QuartzNet3x2 (93k params)       Speech Commands V2    97.29% Test
+=============================== ===================== ============
+
+
+References
+^^^^^^^^^^
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-MODELS
+    :keyprefix: speech-recognition-models-
\ No newline at end of file
diff --git a/docs/sources/source/speech_command/speech_recognition_all.bib b/docs/sources/source/speech_command/speech_recognition_all.bib
new file mode 100644
index 000000000000..a358cf2a70c9
--- /dev/null
+++ b/docs/sources/source/speech_command/speech_recognition_all.bib
@@ -0,0 +1,50 @@
+
+@inproceedings{hu2018squeeze,
+  title={Squeeze-and-excitation networks},
+  author={Hu, Jie and Shen, Li and Sun, Gang},
+  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+  pages={7132--7141},
+  year={2018}
+}
+
+@article{karim2019multivariate,
+  title={Multivariate lstm-fcns for time series classification},
+  author={Karim, Fazle and Majumdar, Somshubra and Darabi, Houshang and Harford, Samuel},
+  journal={Neural Networks},
+  volume={116},
+  pages={237--245},
+  year={2019},
+  publisher={Elsevier}
+}
+
+@article{warden2018speech,
+  title={Speech commands: A dataset for limited-vocabulary speech recognition},
+  author={Warden, Pete},
+  journal={arXiv preprint arXiv:1804.03209},
+  year={2018}
+}
+
+@article{kriman2019quartznet,
+  title={Quartznet: Deep automatic speech recognition with 1d time-channel separable convolutions},
+  author={Kriman, Samuel and Beliaev, Stanislav and Ginsburg, Boris and Huang, Jocelyn and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Zhang, Yang},
+  journal={arXiv preprint arXiv:1910.10261},
+  year={2019}
+}
+
+@article{park2019,
+       author = {{Park}, Daniel S. and {Chan}, William and {Zhang}, Yu and
+         {Chiu}, Chung-Cheng and {Zoph}, Barret and {Cubuk}, Ekin D. and
+         {Le}, Quoc V.},
+        title = "{SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition}",
+      journal = {arXiv e-prints},
+      year = "2019",
+      eid = {arXiv:1904.08779},
+      eprint = {1904.08779},
+}
+
+@article{li2019jasper,
+  title={Jasper: An End-to-End Convolutional Neural Acoustic Model},
+  author={Li, Jason and Lavrukhin, Vitaly and Ginsburg, Boris and Leary, Ryan and Kuchaiev, Oleksii and Cohen, Jonathan M and Nguyen, Huyen and Gadde, Ravi Teja},
+  journal={arXiv preprint arXiv:1904.03288},
+  year={2019}
+}
\ No newline at end of file
diff --git a/docs/sources/source/speech_command/temporal_se.png b/docs/sources/source/speech_command/temporal_se.png
new file mode 100644
index 000000000000..3399d8ebd6f8
Binary files /dev/null and b/docs/sources/source/speech_command/temporal_se.png differ
diff --git a/docs/sources/source/speech_command/tutorial.rst b/docs/sources/source/speech_command/tutorial.rst
new file mode 100644
index 000000000000..bd0aa38814bb
--- /dev/null
+++ b/docs/sources/source/speech_command/tutorial.rst
@@ -0,0 +1,657 @@
+Tutorial
+========
+
+Make sure you have installed ``nemo`` and the ``nemo_asr`` collection.
+See the :ref:`installation` section.
+
+.. note::
+  You need to have ``nemo`` and the ``nemo_asr`` collection for this tutorial.
+  It is also necessary to install `torchaudio` in order to use MFCC preprocessing.
+
+
+Introduction
+------------
+
+Speech Command Recognition is the task of classifying an input audio pattern into a discrete set of classes.
+It is a subset of Automatic Speech Recognition, sometimes referred to as Key Word Spotting, in which a model is constantly analyzing speech patterns to detect certain "command" classes.
+Upon detection of these commands, a specific action can be taken by the system. It is often the objective of command recognition models to be small and efficient, so that they can be deployed onto
+low power sensors and remain active for long durations of time.
+
+This Speech Command recognition tutorial is based on the QuartzNet model :cite:`speech-recognition-tut-kriman2019quartznet` with
+a modified decoder head to suit classification tasks. Instead of predicting a token for each time step of the input, we predict
+a single label for the entire duration of the audio signal. This is accomplished by a decoder head that performs Global Max / Average pooling
+across all timesteps prior to classification. After this, the model can be trained via standard categorical cross-entropy loss.
+
+1. Audio preprocessing (feature extraction): signal normalization, windowing, (log) spectrogram (or mel scale spectrogram, or MFCC)
+2. Data augmentation using SpecAugment :cite:`speech-recognition-tut-park2019` to increase number of data samples.
+3. Develop a small Neural classification model which can be trained efficiently.
+
+.. note::
+  A Jupyter Notebook containing all the steps to download the dataset, train a model and evaluate its results
+  is available at : `Speech Commands Using NeMo <https://github.com/NVIDIA/NeMo/blob/master/examples/asr/notebooks/3_Speech_Commands_using_NeMo.ipynb>`_
+
+Data Preparation
+----------------
+
+We will be using the open source Google Speech Commands Dataset (we will use V1 of the dataset for the tutorial, but require
+very minor changes to support V2 dataset). These scripts below will download the dataset and convert it to a format suitable
+for use with `nemo_asr`:
+
+
+.. code-block:: bash
+
+    mkdir data
+    # process_speech_commands_data.py script is located under <nemo_git_repo_root>/scripts
+    # The `--rebalance` flag will duplicate elements in the train set so that all classes
+    # have the same number of elements. It is not mandatory to add this flag.
+    python process_speech_commands_data.py --data_root=data --data_version=1 --rebalance
+
+.. note::
+    You should have at least 4GB of disk space available if you've used ``--data_version=1``; and at least 6GB if you used ``--data_version=2``. Also, it will take some time to download and process, so go grab a coffee.
+
+After download and conversion, your `data` folder should contain a directory called `google_speech_recognition_v{1/2}`.
+Inside this directory, there should be multiple subdirectory containing wav files, and three json manifest files:
+
+* `train_manifest.json`
+* `validation_manifest.json`
+* `test_manifest.json`
+
+Each line in json file describes a training sample - `audio_filepath` contains path to the wav file, `duration` it's duration in seconds, and `label` is the class label:
+
+.. code-block:: json
+
+    {"audio_filepath": "<absolute path to dataset>/two/8aa35b0c_nohash_0.wav", "duration": 1.0, "command": "two"}
+    {"audio_filepath": "<absolute path to dataset>/two/ec5ab5d5_nohash_2.wav", "duration": 1.0, "command": "two"}
+
+
+Training
+---------
+
+We will be training a QuartzNet model :cite:`speech-recognition-tut-kriman2019quartznet`.
+The benefit of QuartzNet over JASPER models is that they use Separable Convolutions, which greatly reduce the number of
+parameters required to get good model accuracy.
+
+QuartzNet models generally follow the model definition pattern QuartzNet-[BxR], where B is the number of blocks and R is the number of
+convolutional sub-blocks. Each sub-block contains a 1-D masked convolution, batch normalization, ReLU, and dropout:
+
+    .. image:: quartz_vertical.png
+        :align: center
+        :alt: quartznet model
+
+In the tutorial we will be using model QuartzNet [3x1].
+The script below does both training and evaluation (on V1 dataset) on single GPU:
+
+    .. tip::
+        Run Jupyter notebook and walk through this script step-by-step
+
+
+**Training script**
+
+.. code-block:: python
+
+    # Import some utility functions
+    import argparse
+    import copy
+    import math
+    import os
+    import glob
+    from functools import partial
+    from datetime import datetime
+    from ruamel.yaml import YAML
+
+    # NeMo's "core" package
+    import nemo
+    # NeMo's ASR collection
+    import nemo.collections.asr as nemo_asr
+    # NeMo's learning rate policy
+    from nemo.utils.lr_policies import CosineAnnealing
+    from nemo.collections.asr.helpers import (
+        monitor_classification_training_progress,
+        process_classification_evaluation_batch,
+        process_classification_evaluation_epoch,
+    )
+
+    from nemo.utils import logging
+
+    # Lets define some hyper parameters
+    lr = 0.05
+    num_epochs = 100
+    batch_size = 128
+    weight_decay = 0.001
+
+    # Create a Neural Factory
+    # It creates log files and tensorboard writers for us among other functions
+    neural_factory = nemo.core.NeuralModuleFactory(
+        log_dir='./quartznet-3x1-v1',
+        create_tb_writer=True)
+    tb_writer = neural_factory.tb_writer
+
+    # Path to our training manifest
+    train_dataset = "<path_to_where_you_put_data>/train_manifest.json"
+
+    # Path to our validation manifest
+    eval_datasets = "<path_to_where_you_put_data>/test_manifest.json"
+
+    # Here we will be using separable convolutions
+    # with 3 blocks (k=3 repeated once r=1 from the picture above)
+    yaml = YAML(typ="safe")
+    with open("<nemo_git_repo_root>/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml") as f:
+        jasper_params = yaml.load(f)
+
+    # Pre-define a set of labels that this model must learn to predict
+    labels = jasper_params['labels']
+
+    # Get the sampling rate of the data
+    sample_rate = jasper_params['sample_rate']
+
+    # Check if data augmentation such as white noise and time shift augmentation should be used
+    audio_augmentor = jasper_params.get('AudioAugmentor', None)
+
+    # Build the input data layer and the preprocessing layers for the train set
+    train_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=train_dataset,
+        labels=labels,
+        sample_rate=sample_rate,
+        batch_size=batch_size,
+        num_workers=os.cpu_count(),
+        augmentor=audio_augmentor,
+        shuffle=True
+    )
+
+     # Build the input data layer and the preprocessing layers for the test set
+    eval_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=eval_datasets,
+        sample_rate=sample_rate,
+        labels=labels,
+        batch_size=args.eval_batch_size,
+        num_workers=os.cpu_count(),
+        shuffle=False,
+    )
+
+    # We will convert the raw audio data into MFCC Features to feed as input to our model
+    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
+        sample_rate=sample_rate, **jasper_params["AudioToMFCCPreprocessor"],
+    )
+
+    # Compute the total number of samples and the number of training steps per epoch
+    N = len(train_data_layer)
+    steps_per_epoch = math.ceil(N / float(args.batch_size))
+
+    logging.info("Steps per epoch : {0}".format(steps_per_epoch))
+    logging.info('Have {0} examples to train on.'.format(N))
+
+    # Here we begin defining all of the augmentations we want
+    # We will pad the preprocessed spectrogram image to have a certain number of timesteps
+    # This centers the generated spectrogram and adds black boundaries to either side
+    # of the padded image.
+    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(audio_length=128)
+
+    # We also optionally add `SpecAugment` augmentations based on the config file
+    # SpecAugment has various possible augmentations to the generated spectrogram
+    # 1) Frequency band masking
+    # 2) Time band masking
+    # 3) Rectangular cutout
+    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
+    if spectr_augment_config:
+        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)
+
+    # Build the QuartzNet Encoder model
+    # The config defines the layers as a list of dictionaries
+    # The first and last two blocks are not considered when we say QuartzNet-[BxR]
+    # B is counted as the number of blocks after the first layer and before the penultimate layer.
+    # R is defined as the number of repetitions of each block in B.
+    # Note: We can scale the convolution kernels size by the float parameter `kernel_size_factor`
+    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"])
+
+    # We then define the QuartzNet decoder.
+    # This decoder head is specialized for the task for classification, such that it
+    # accepts a set of `N-feat` per timestep of the model, and averages these features
+    # over all the timesteps, before passing a Linear classification layer on those features.
+    jasper_decoder = nemo_asr.JasperDecoderForClassification(
+        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
+        num_classes=len(labels),
+        **jasper_params['JasperDecoderForClassification'],
+    )
+
+    # We can easily apply cross entropy loss to train this model
+    ce_loss = nemo_asr.CrossEntropyLossNM()
+
+    # Lets print out the number of parameters of this model
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(
+        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
+    )
+    logging.info('================================')
+
+    # Now we have all of the components that are required to build the NeMo execution graph!
+    ## Build the training data loaders and preprocessors first
+    audio_signal, audio_signal_len, commands, command_len = train_data_layer()
+    processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
+    processed_signal, processed_signal_len = crop_pad_augmentation(
+        input_signal=processed_signal,
+        length=audio_signal_len
+    )
+
+    ## Augment the dataset for training
+    if spectr_augment_config:
+        processed_signal = data_spectr_augmentation(input_spec=processed_signal)
+
+    ## Define the model
+    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
+    decoded = jasper_decoder(encoder_output=encoded)
+
+    ## Obtain the train loss
+    train_loss = ce_loss(logits=decoded, labels=commands)
+
+    # Now we build the test graph in a similar way, reusing the above components
+    ## Build the test data loader and preprocess same way as train graph
+    ## But note, we do not add the spectrogram augmentation to the test graph !
+    test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_data_layer()
+    test_processed_signal, test_processed_signal_len = data_preprocessor(
+        input_signal=test_audio_signal, length=test_audio_signal_len
+    )
+    test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
+        input_signal=test_processed_signal, length=test_processed_signal_len
+    )
+
+    # Pass the test data through the model encoder and decoder
+    test_encoded, test_encoded_len = jasper_encoder(
+        audio_signal=test_processed_signal, length=test_processed_signal_len
+    )
+    test_decoded = jasper_decoder(encoder_output=test_encoded)
+
+    # Compute test loss for visualization
+    test_loss = ce_loss(logits=test_decoded, labels=test_commands)
+
+    # Now that we have our training and evaluation graphs built,
+    # we can focus on a few callbacks to help us save the model checkpoints
+    # during training, as well as display train and test metrics
+
+    # Callbacks needed to print train info to console and Tensorboard
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        # Notice that we pass in loss, predictions, and the labels.
+        # Of course we would like to see our training loss, but we need the
+        # other arguments to calculate the accuracy.
+        tensors=[train_loss, decoded, commands],
+        # The print_func defines what gets printed.
+        print_func=partial(monitor_classification_training_progress, eval_metric=None),
+        get_tb_values=lambda x: [("loss", x[0])],
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    # Callbacks needed to print test info to console and Tensorboard
+    tagname = 'TestSet'
+    eval_callback = nemo.core.EvaluatorCallback(
+        eval_tensors=[test_loss, test_decoded, test_commands],
+        user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),
+        user_epochs_done_callback=partial(process_classification_evaluation_epoch, eval_metric=1, tag=tagname),
+        eval_step=200,  # How often we evaluate the model on the test set
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    # Callback to save model checkpoints
+    chpt_callback = nemo.core.CheckpointCallback(
+        folder=neural_factory.checkpoint_dir,
+        step_freq=1000,
+    )
+
+    # Prepare a list of checkpoints to pass to the engine
+    callbacks = [train_callback, eval_callback, chpt_callback]
+
+    # Now we have all the components required to train the model
+    # Lets define a learning rate schedule
+
+    # Define a learning rate schedule
+    lr_policy = CosineAnnealing(
+        total_steps=num_epochs * steps_per_epoch,
+        warmup_ratio=0.05,
+        min_lr=0.001,
+    )
+
+    logging.info(f"Using `{lr_policy}` Learning Rate Scheduler")
+
+    # Finally, lets train this model !
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=lr_policy,
+        optimizer="novograd",
+        optimization_params={
+            "num_epochs": num_epochs,
+            "max_steps": None,
+            "lr": lr,
+            "momentum": 0.95,
+            "betas": (0.98, 0.5),
+            "weight_decay": weight_decay,
+            "grad_norm_clip": None,
+        },
+        batches_per_step=1,
+    )
+
+.. note::
+    This script trains should finish 100 epochs in about 4-5 hours on GTX 1080.
+
+.. tip::
+    To improve your accuracy:
+        (1) Train longer (200-300 epochs)
+        (2) Train on more data (try increasing the augmentation parameters for SpectrogramAugmentation)
+        (3) Use larger model
+        (4) Train on several GPUs and use mixed precision (on NVIDIA Volta and Turing GPUs)
+        (5) Start with pre-trained checkpoints
+
+
+Mixed Precision training
+-------------------------
+Mixed precision and distributed training in NeMo is based on `NVIDIA's APEX library <https://github.com/NVIDIA/apex>`_.
+Make sure it is installed prior to attempting mixed precision training.
+
+To train with mixed-precision all you need is to set `optimization_level` parameter of `nemo.core.NeuralModuleFactory`  to `nemo.core.Optimization.mxprO1`. For example:
+
+.. code-block:: python
+
+    nf = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=nemo.core.Optimization.mxprO1,
+        placement=nemo.core.DeviceType.AllGpu,
+        cudnn_benchmark=True)
+
+
+Multi-GPU training
+-------------------
+
+Enabling multi-GPU training with NeMo is easy:
+
+   (1) First set `placement` to `nemo.core.DeviceType.AllGpu` in NeuralModuleFactory and in your Neural Modules
+   (2) Have your script accept 'local_rank' argument and do not set it yourself: `parser.add_argument("--local_rank", default=None, type=int)`
+   (3) Use `torch.distributed.launch` package to run your script like this (replace <num_gpus> with number of gpus):
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py ...
+
+.. note::
+    Because mixed precision requires Tensor Cores it only works on NVIDIA Volta and Turing based GPUs
+
+Large Training Example
+~~~~~~~~~~~~~~~~~~~~~~
+
+Please refer to the `<nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py` for comprehensive example.
+It builds one train DAG, one validation DAG and a test DAG to evaluate on different datasets.
+
+Assuming, you are working with Volta-based DGX, you can run training like this:
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=<num_gpus> <nemo_git_repo_root>/examples/asr/quartznet_speech_commands.py --model_config "<nemo_git_repo_root>/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml" \
+      --train_dataset="<absolute path to dataset>/train_manifest.json" --eval_datasets "<absolute path to dataset>/validation_manifest.json" "<absolute path to dataset>/test_manifest.json" \
+      --num_epochs=200 --batch_size=128 --eval_batch_size=128 --eval_freq=200 --lr=0.05 --min_lr=0.001 \
+      --optimizer="novograd" --weight_decay=0.001 --amp_opt_level="O1" --warmup_ratio=0.05 --hold_ratio=0.45 \
+      --checkpoint_dir="./checkpoints/quartznet_speech_commands_checkpoints_3x1_v1/" \
+      --exp_name="./results/quartznet_speech_classification-quartznet-3x1_v1/"
+
+The command above should trigger 8-GPU training with mixed precision. In the command above various manifests (.json) files are various datasets. Substitute them with the ones containing your data.
+
+.. tip::
+    You can pass several manifests (comma-separated) to train on a combined dataset like this: `--train_manifest=/manifests/<first dataset>.json,/manifests/<second dataset>.json`
+
+
+Fine-tuning
+-----------
+Training time can be dramatically reduced if starting from a good pre-trained model:
+
+    (1) Obtain pre-trained model (jasper_encoder, jasper_decoder and configuration files).
+    (2) load pre-trained weights right after you've instantiated your jasper_encoder and jasper_decoder, like this:
+
+.. code-block:: python
+
+    jasper_encoder.restore_from("<path_to_checkpoints>/JasperEncoder-STEP-89000.pt")
+    jasper_decoder.restore_from("<path_to_checkpoints>/JasperDecoderForClassification-STEP-89000.pt")
+    # in case of distributed training add args.local_rank
+    jasper_decoder.restore_from("<path_to_checkpoints>/JasperDecoderForClassification-STEP-89000.pt", args.local_rank)
+
+.. tip::
+    When fine-tuning, use smaller learning rate.
+
+
+Evaluation
+----------
+
+First download pre-trained model (jasper_encoder, jasper_decoder and configuration files) into `<path_to_checkpoints>`.
+We will use this pre-trained model to measure classification accuracy on Google Speech Commands dataset v1,
+but they can similarly be used for v2 dataset.
+
+.. note::
+    To listen to the samples that were incorrectly labeled by the model, please run the following code in a notebook.
+
+.. code-block:: python
+
+    # Lets add some generic imports.
+    # Please note that you will need to install `librosa` for this code
+    # To install librosa : Run `!pip install librosa` from the notebook itself.
+    import glob
+    import os
+    import json
+    import re
+    import numpy as np
+    import torch
+    import librosa
+    import librosa.display
+    import matplotlib.pyplot as plt
+    import IPython.display as ipd
+    from ruamel.yaml import YAML
+
+    # Import nemo and asr collections
+    import nemo
+    import nemo.collections.asr as nemo_asr
+
+    from nemo.utils import logging
+
+    # We add some
+    data_dir = '<path to the data directory>'
+    data_version = 1
+    config_path = '<path to the config file for this model>'
+    model_path = '<path to the checkpoint directory for this model>'
+
+    test_manifest = os.path.join(data_dir, "test_manifest.json")
+
+    # Parse the config file provided to us
+    # Parse config and pass to model building function
+    yaml = YAML(typ='safe')
+    with open(config_path) as f:
+        params = yaml.load(f)
+        logging.info("******\nLoaded config file.\n******")
+
+    labels = params['labels']  # Vocab of tokens
+    sample_rate = params['sample_rate']
+    batch_size = 128
+
+    # Build the evaluation graph
+    # Create our NeuralModuleFactory, which will oversee the neural modules.
+    neural_factory = nemo.core.NeuralModuleFactory(
+        log_dir=f'v{data_version}/eval_results/')
+
+    logger = neural_factory.logger
+
+    test_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=test_manifest,
+        labels=labels,
+        sample_rate=sample_rate,
+        shuffle=False,
+        batch_size=batch_size,
+    )
+    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(
+        audio_length=128
+    )
+    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
+        sample_rate=sample_rate,
+        **params['AudioToMFCCPreprocessor']
+    )
+
+    # Create the Jasper_3x1 encoder as specified, and a classification decoder
+    encoder = nemo_asr.JasperEncoder(**params['JasperEncoder'])
+    decoder = nemo_asr.JasperDecoderForClassification(
+        feat_in=params['JasperEncoder']['jasper'][-1]['filters'],
+        num_classes=len(labels),
+        **params['JasperDecoderForClassification']
+    )
+
+    ce_loss = nemo_asr.CrossEntropyLossNM()
+
+    # Assemble the DAG components
+    test_audio_signal, test_audio_signal_len, test_commands, test_command_len = test_data_layer()
+
+    test_processed_signal, test_processed_signal_len = data_preprocessor(
+        input_signal=test_audio_signal,
+        length=test_audio_signal_len
+    )
+
+    # --- Crop And Pad Augment --- #
+    test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
+        input_signal=test_processed_signal,
+        length=test_processed_signal_len
+    )
+
+    test_encoded, test_encoded_len = encoder(
+        audio_signal=test_processed_signal,
+        length=test_processed_signal_len
+    )
+
+    test_decoded = decoder(
+        encoder_output=test_encoded
+    )
+
+    test_loss = ce_loss(
+        logits=test_decoded,
+        labels=test_commands
+    )
+
+    # We import the classification accuracy metric to compute Top-1 accuracy
+    from nemo.collections.asr.metrics import classification_accuracy
+    from functools import partial
+
+    # --- Inference Only --- #
+    # We've already built the inference DAG above, so all we need is to call infer().
+    evaluated_tensors = neural_factory.infer(
+        # These are the tensors we want to get from the model.
+        tensors=[test_loss, test_decoded, test_commands],
+        # checkpoint_dir specifies where the model params are loaded from.
+        checkpoint_dir=model_path
+        )
+
+    # Let us count the total number of incorrect classifications by this model
+    correct_count = 0
+    total_count = 0
+
+    for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):
+        acc = classification_accuracy(
+            logits=logits,
+            targets=labels,
+            top_k=[1]
+        )
+
+        # Select top 1 accuracy only
+        acc = acc[0]
+
+        # Since accuracy here is "per batch", we simply denormalize it by multiplying
+        # by batch size to recover the count of correct samples.
+        correct_count += int(acc * logits.size(0))
+        total_count += logits.size(0)
+
+    logging.info(f"Total correct / Total count : {correct_count} / {total_count}")
+    logging.info(f"Final accuracy : {correct_count / float(total_count)}")
+
+    # Let us now filter out the incorrectly labeled samples from the total set of samples in the test set
+
+    # First lets create a utility class to remap the integer class labels to actual string label
+    class ReverseMapLabel:
+        def __init__(self, data_layer: nemo_asr.AudioToSpeechLabelDataLayer):
+            self.label2id = dict(data_layer._dataset.label2id)
+            self.id2label = dict(data_layer._dataset.id2label)
+
+        def __call__(self, pred_idx, label_idx):
+            return self.id2label[pred_idx], self.id2label[label_idx]
+
+    # Next, lets get the indices of all the incorrectly labeled samples
+    sample_idx = 0
+    incorrect_preds = []
+    rev_map = ReverseMapLabel(test_data_layer)
+
+    for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):
+        probs = torch.softmax(logits, dim=-1)
+        probas, preds = torch.max(probs, dim=-1)
+
+        incorrect_ids = (preds != labels).nonzero()
+        for idx in incorrect_ids:
+            proba = float(probas[idx][0])
+            pred = int(preds[idx][0])
+            label = int(labels[idx][0])
+            idx = int(idx[0]) + sample_idx
+
+            incorrect_preds.append((idx, *rev_map(pred, label), proba))
+
+        sample_idx += labels.size(0)
+
+    logging.info(f"Num test samples : {total_count}")
+    logging.info(f"Num errors : {len(incorrect_preds)}")
+
+    # First lets sort by confidence of prediction
+    incorrect_preds = sorted(incorrect_preds, key=lambda x: x[-1], reverse=False)
+
+    # Lets print out the (test id, predicted label, ground truth label, confidence)
+    # tuple of first 20 incorrectly labeled samples
+    for incorrect_sample in incorrect_preds[:20]:
+        logging.info(str(incorrect_sample))
+
+    # Lets define a threshold below which we designate a model's prediction as "low confidence"
+    # and then filter out how many such samples exist
+    low_confidence_threshold = 0.25
+    count_low_confidence = len(list(filter(lambda x: x[-1] <= low_confidence_threshold, incorrect_preds)))
+    logging.info(f"Number of low confidence predictions : {count_low_confidence}")
+
+    # One interesting observation is to actually listen to these samples whose predicted labels were incorrect
+    # Note: The following requires the use of a Notebook environment
+
+    # First lets create a helper function to parse the manifest files
+    def parse_manifest(manifest):
+        data = []
+        for line in manifest:
+            line = json.loads(line)
+            data.append(line)
+
+        return data
+
+    # Now lets load the test manifest into memory
+    test_samples = []
+    with open(test_manifest, 'r') as test_f:
+        test_samples = test_f.readlines()
+
+    test_samples = parse_manifest(test_samples)
+
+    # Next, lets create a helper function to actually listen to certain samples
+    def listen_to_file(sample_id, pred=None, label=None, proba=None):
+        # Load the audio waveform using librosa
+        filepath = test_samples[sample_id]['audio_filepath']
+        audio, sample_rate = librosa.load(filepath)
+
+        if pred is not None and label is not None and proba is not None:
+            logging.info(f"Sample : {sample_id} Prediction : {pred} Label : {label} Confidence = {proba: 0.4f}")
+        else:
+            logging.info(f"Sample : {sample_id}")
+
+        return ipd.Audio(audio, rate=sample_rate)
+
+    # Finally, lets listen to all the audio samples where the model made a mistake
+    # Note: This list of incorrect samples may be quite large, so you may choose to subsample `incorrect_preds`
+    for sample_id, pred, label, proba in incorrect_preds:
+        ipd.display(listen_to_file(sample_id, pred=pred, label=label, proba=proba))  # Needs to be run in a notebook environment
+
+References
+----------
+
+.. bibliography:: speech_recognition_all.bib
+    :style: plain
+    :labelprefix: SPEECH-RECOGNITION-ALL-TUT
+    :keyprefix: speech-recognition-tut-
diff --git a/docs/sources/source/training.rst b/docs/sources/source/training.rst
index ef60e4aa1f44..905d59c91324 100644
--- a/docs/sources/source/training.rst
+++ b/docs/sources/source/training.rst
@@ -17,18 +17,16 @@ To enable mixed-precision in NeMo all you need to do is to set `optimization_lev
 .. important::
     Mixed precision requires Tensor Cores, so it works only on NVIDIA Volta and Turing GPUs.
 
-Multi-GPU training
+Multi-GPU Training
 ~~~~~~~~~~~~~~~~~~
 
 For multi-GPU training:
 
-(1) Set `placement` to `nemo.core.DeviceType.AllGpu` in NeuralModuleFactory
-(2) Add 'local_rank' argument to your script and do not set it yourself: `parser.add_argument("--local_rank", default=None, type=int)`
+Add 'local_rank' argument to your script and do not set it yourself: `parser.add_argument("--local_rank", default=os.getenv('LOCAL_RANK', None), type=int)`
 
 .. code-block:: python
 
     nf = nemo.core.NeuralModuleFactory(
-           placement=nemo.core.DeviceType.AllGpu,     
            local_rank=args.local_rank)
 
 
@@ -36,8 +34,7 @@ Use `torch.distributed.launch` package to run your script like this (assuming 8
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py --num_gpus=8 ...
-
+    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py ...
 
 Example
 ~~~~~~~
@@ -49,7 +46,7 @@ If you are working with a Volta-based DGX, you can run training like this:
 
 .. code-block:: bash
 
-    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_gpus=8 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_manifest=/manifests/librivox-train-all.json --val_manifest1=/manifests/librivox-dev-clean.json --val_manifest2=/manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/jasper15x5SEP.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
+    python -m torch.distributed.launch --nproc_per_node=8 <nemo_git_repo_root>/examples/asr/jasper.py --batch_size=64 --num_epochs=100 --lr=0.015 --warmup_steps=8000 --weight_decay=0.001 --train_manifest=/manifests/librivox-train-all.json --val_manifest1=/manifests/librivox-dev-clean.json --val_manifest2=/manifests/librivox-dev-other.json --model_config=<nemo_git_repo_root>/nemo/examples/asr/configs/jasper15x5SEP.yaml --exp_name=MyLARGE-ASR-EXPERIMENT
 
 The command above should trigger 8-GPU training with mixed precision. In the command above various manifests (.json) files are various datasets. Substitute them with the ones containing your data.
 
@@ -58,4 +55,35 @@ The command above should trigger 8-GPU training with mixed precision. In the com
 
 This example would train on 3 data sets: LibriSpeech, Mozilla Common Voice and LibriSpeech speed perturbed.
 
+Multi-Node Training
+~~~~~~~~~~~~~~~~~~~
+We highly recommend reading pytorch's distributed documentation prior to trying multi-node, but here is a quick start
+guide on how to setup multi-node training using TCP initialization. Assume that we have 2 machines each with 4 gpus
+each. Let's call machine 1 the master node. We need the IP address of the master node and a free port on the master
+node. On machine 1, we run
+
+.. code-block:: bash
+
+    python -m torch.distributed.launch --nproc_per_node=4 --nnodes=2 --node_rank=0 --master_addr=<MASTER_IP_ADDRESS> --master_port=<FREE_PORT> jasper.py ...
+
+On machine 2, we run
+
+.. code-block:: bash
 
+    python -m torch.distributed.launch --nproc_per_node=4 --nnodes=2 --node_rank=1 --master_addr=<MASTER_IP_ADDRESS> --master_port=<FREE_PORT> jasper.py ...
+
+.. tip::
+    Setting the environment variable NCCL_DEBUG to INFO can help identify setup issues
+
+.. tip::
+    We recommend reading the following pytorch documentation
+    https://pytorch.org/docs/stable/distributed.html#launch-utility
+    https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py
+
+.. tip::
+    To help with multi-processing, neural_factory contains two attributes: ``local_rank`` and ``global_rank``.
+    ``local_rank`` refers to the rank on the current machine whereas ``global_rank`` refers to the rank across all
+    machines. For example, assume you have 2 machines each with 4 gpus. global_rank 0 will have local_rank 0 and have
+    the 1st gpu on machine 1, whereas global_rank 5 COULD have local_rank 0 and have the 1st gpu on machine 2. In other
+    words local_rank == 0 and global_rank == 0 ensures that it has the 1st GPU on the master node, and local_rank == 0
+    and global_rank != 0 ensures that it has the 1st GPU on slave nodes.
diff --git a/docs/sources/source/tts/fastspeech.rst b/docs/sources/source/tts/fastspeech.rst
new file mode 100644
index 000000000000..c2e5f9ebb9b0
--- /dev/null
+++ b/docs/sources/source/tts/fastspeech.rst
@@ -0,0 +1,39 @@
+.. _fastspeech:
+
+Fast Speech
+===========
+
+Model
+-----
+This model is based on the
+`Fast Speech model <https://www.microsoft.com/en-us/research/blog/fastspeech-new-text-to-speech-model-improves-on-speed-accuracy-and-controllability>`_
+(see also `paper <https://arxiv.org/abs/1905.09263>`_).
+
+Fast Speech operates within two distinct stages: durations extraction and actual training.
+
+Durations Extraction
+++++++++++++++++++++
+
+First, for each input dataset char, you should obtain duration integer, which corresponds to the number of steps this
+char lasts in audio sample. For that, NeMo uses alignment map from Tacotron 2 inference teacher forced with ground
+truth mel spectrogram for shape matching. For each timestep, we add +1 duration to char with strongest alignment signal
+in alignment map.
+
+To do that, run fastspeech_durations.py from NeMo/examples/tts with following arguments (provide path to durations
+dir to save):
+
+.. code-block:: bash
+
+    python fastspeech_durations.py --spec_model=tacotron2 --spec_model_config=configs/tacotron2.yaml --spec_model_load_dir=<directory_with_tacotron2_checkopints> --eval_dataset=<data_root>/ljspeech_train.json --durations_dir=<data_root>/durs
+
+Fast Speech Training
+++++++++++++++++++++
+
+Second stage is actual model training. NeMo wrap all fast speech mel generation and durations calculation logic in one
+neural model with corresponded name. FastSpeechLoss, then uses its output to calculate two term loss value.
+
+To begin training with librispeech data durations obtained from previous step, run this:
+
+.. code-block:: bash
+
+    python fastspeech.py --model_config=configs/fastspeech.yaml --train_dataset=<data_root>/ljspeech_train.json --durations_dir=<data_root>/durs
diff --git a/docs/sources/source/tts/models.rst b/docs/sources/source/tts/models.rst
index f3636a4f4669..0218ddb6868c 100644
--- a/docs/sources/source/tts/models.rst
+++ b/docs/sources/source/tts/models.rst
@@ -6,3 +6,4 @@ Models
 
    tacotron2
    waveglow
+   fastspeech
diff --git a/docs/sources/source/tts/tutorial.rst b/docs/sources/source/tts/tutorial.rst
index 94691bf9d72a..67042b667031 100644
--- a/docs/sources/source/tts/tutorial.rst
+++ b/docs/sources/source/tts/tutorial.rst
@@ -70,15 +70,15 @@ you can run the following to start training:
 
 .. code-block:: bash
 
-    python tacotron2.py --train_dataset=<data_root>/ljspeech_train.json --eval_datasets <data_root>/ljspeech_eval.json --model_config=configs/tacotron.yaml --max_steps=30000
+    python tacotron2.py --train_dataset=<data_root>/ljspeech_train.json --eval_datasets <data_root>/ljspeech_eval.json --model_config=configs/tacotron2.yaml --max_steps=30000
 
 Training Tacotron 2 on Mandarin also can be done by running the `tacotron2.py` file.
 You can run the following to start training:
 
 .. code-block:: bash
 
-    python tacotron2.py --train_dataset=<data_root>/databaker_csmsc_train.json --eval_datasets <data_root>/databaker_csmsc_eval.json --model_config=configs/tacotron_mandarin.yaml --max_steps=30000
-    
+    python tacotron2.py --train_dataset=<data_root>/databaker_csmsc_train.json --eval_datasets <data_root>/databaker_csmsc_eval.json --model_config=configs/tacotron2_mandarin.yaml --max_steps=30000
+
 .. tip::
     Tacotron 2 normally takes around 20,000 steps for attention to be learned.
     Once attention is learned, this is when you can use the model to generate
@@ -87,8 +87,8 @@ You can run the following to start training:
 Mixed Precision training
 -------------------------
 Enabling or disabling mixed precision training can be changed through a command
-line argument --amp_opt_level. Recommended and default values for Tacotron 2
-and Waveglow are O1. It can be:
+line argument ``--amp_opt_level``. Recommended and default values for Tacotron 2 are O0,
+whereas values for Waveglow are O1. Options for amp_opt_level are:
 
 - O0: float32 training
 - O1: mixed precision training
@@ -115,8 +115,9 @@ torch.distributed.launch module and sepcifying the num_gpus as the
 Inference
 ---------
 You can now to inference with either your own trained Tacotron 2, or you can
-use our pre-trained Tacotron 2 model. `Please download our
-pretrained model here  <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_.
+use our pre-trained Tacotron 2 model. Please download our models for
+`tacotron 2 here <https://ngc.nvidia.com/catalog/models/nvidia:tacotron2_ljspeech>`_ and 
+`waveglow here <https://ngc.nvidia.com/catalog/models/nvidia:waveglow_ljspeech>`_.
 Next create the texts that you want
 to generate and add them to a json like the training dataset. They should
 have lines like so:
@@ -142,4 +143,9 @@ NeMo/examples/tts folder like so:
 
     python tts_infer.py --spec_model=tacotron2 --spec_model_config=configs/tacotron2.yaml --spec_model_load_dir=<directory_with_tacotron2_checkopints> --vocoder=waveglow --vocoder_model_config=configs/waveglow.yaml --vocoder_model_load_dir=<directory_with_waveglow_checkopints> --save_dir=<where_you_want_to_save_wav_files> --eval_dataset <mainfest_to_generate>
 
-For Mandarin, remember to replace the config file of Tacotron 2 with tacotron2_mandarin.yaml.
\ No newline at end of file
+For Mandarin, remember to replace the config file of Tacotron 2 with tacotron2_mandarin.yaml.
+
+.. tip::
+    The inference via waveglow can be further controlled via the ``--waveglow_denoiser_strength`` and
+    ``--waveglow_sigma`` arguments. If there is a stuble white noise to the audio, we recommend slowing increasing
+    ``--waveglow_denoiser_strength`` from 0.
diff --git a/docs/sources/source/tutorials/callbacks.rst b/docs/sources/source/tutorials/callbacks.rst
index c68fe4f48641..e63146cb901d 100644
--- a/docs/sources/source/tutorials/callbacks.rst
+++ b/docs/sources/source/tutorials/callbacks.rst
@@ -1,7 +1,7 @@
-Callbacks
-=========
+Old Callbacks
+=============
 NeMo uses callbacks to do a variety of helper functions during training.
-NeMo comes with three useful callbacks: SimpleLossLoggerCallback, 
+NeMo comes with three useful callbacks: SimpleLossLoggerCallback,
 CheckpointCallback, and EvaluatorCallback. Callbacks are defined prior to
 calling the train() function, and are passed to the train() function.
 For example, a common training script will look like:
@@ -146,4 +146,26 @@ for tb_writer_func to consume. The user must log all data of interest inside
 tb_writer_func including scalars that would otherwise be logged if
 tb_writer_func was not passed to EvaluatorCallback.
 
-For an example, please see the scripts inside <nemo_dir>/examples.
\ No newline at end of file
+You can also log your evaluation metrics into Weights & Biases experiment trackers.
+To do so, please setup these parameters. Also make sure wandb is installed and you did ``wandb login``.
+
+- wandb_name: W&B experiment name
+- wandb_project: W&B project name
+
+For an example, please see the scripts inside <nemo_dir>/examples.
+
+WandbCallback
+-----------------
+WandbCallback logs losses and metrics to `Weights & Biases <https://docs.wandb.com/>`_.
+Make sure wandb is installed and you did ``wandb login``.
+
+This is a light-weight callback to log **training** metrics into Weights & Biases.
+To log evaluation metrics, see Evaluator Callback above.
+
+It requires following arguments:
+
+- train_tensors: list of tensors to evaluate and log based on training batches
+- wandb_name: W&B experiment name
+- wandb_project: W&B project name
+- args: argparse flags - will be logged as hyper parameters for your run
+- update_freq: frequency with which to log updates
diff --git a/docs/sources/source/tutorials/intro.rst b/docs/sources/source/tutorials/intro.rst
index e1758354238f..be51895dffc0 100644
--- a/docs/sources/source/tutorials/intro.rst
+++ b/docs/sources/source/tutorials/intro.rst
@@ -8,6 +8,8 @@ Getting started
    examples
    neuraltypes
    custommodules
+   module_configuration
    weightsharing
    callbacks
    complex_training
+   neural_graphs
diff --git a/docs/sources/source/tutorials/module_configuration.rst b/docs/sources/source/tutorials/module_configuration.rst
new file mode 100644
index 000000000000..b009e3edca75
--- /dev/null
+++ b/docs/sources/source/tutorials/module_configuration.rst
@@ -0,0 +1,59 @@
+Module Configuration
+====================
+
+Neural Modules have configuration that can be imported from/exported to YAML file. \
+A module configuration file stores all parameters required for creation of an instance.
+
+.. note::
+    In the case of Trainable Neural Modules the `configuration` is complementary to checkpoint, i.e. \
+    configuration contains parameters (like e.g. number of layers, hidden size etc.), \
+    whereas checkpoint contains the actual module weights.
+
+
+Exporting the configuration
+---------------------------
+
+In the following example we will once again train a model to learn Taylor's coefficients for y=sin(x). \
+However, we will extend the example by showing how to export configuration of the module to a YAML file and \
+create a second instance having the same set of parameters.
+
+Let us start by creating the :class:`NeuralModuleFactory` object and instatiating the modules from the original example:
+
+.. literalinclude:: ../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 24-34
+
+Now we can export the configuration of any of the existing modules by using  the :meth:`export_to_config()`, for \
+example we can export the configuration of the trainable :class:`TaylorNet` by calling:
+
+.. literalinclude:: ../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 37
+
+Importing the configuration
+---------------------------
+
+There is an analogical function :meth:`import_from_config()` responsible for loading the configuration file:
+
+.. literalinclude:: ../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 40
+
+.. note::
+    The :meth:`import_from_config()` function actually creates a new instance of object of the class that was stored \
+    in the configuration. But it is important to understand that both instances do not share any trainable weights. \
+    NeMo offers a separate mechanism for weight tying.
+
+Now we can use the newly imported module in the same way as every other module. \
+For example, we can build a graph and train it with a NeMo trainer:
+
+.. literalinclude:: ../../../../examples/start_here/module_configuration.py
+   :language: python
+   :lines: 42-
+
+
+.. include:: module_custom_configuration.rst
+
+
+.. note::
+    The above (along with many other) examples can be found in the `nemo/examples` folder
diff --git a/docs/sources/source/tutorials/module_custom_configuration.rst b/docs/sources/source/tutorials/module_custom_configuration.rst
new file mode 100644
index 000000000000..8c368ea0b4b1
--- /dev/null
+++ b/docs/sources/source/tutorials/module_custom_configuration.rst
@@ -0,0 +1,73 @@
+Customizing the configuration
+-----------------------------
+
+
+A generic configuration export enables to use of parameters of primitive types (string, int, float) \
+or nested lists of/dicts of primitive types.
+
+In order to extend that functionality by other, custom types one must overload the \
+generic :meth:`export_to_config()` and  :meth:`import_from_config()` methods for his/her Module class. \
+This tutorial explains how one can do it.
+
+
+In the following example we will derive a class from the :class:`TaylorNet` (used in the previous example) \
+and extend it by those methods. But first, let us define a simple :class:`Status` enum:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 28-30
+
+Now let us define the :class:`CustomTaylorNet` Neural Module class:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 33-38
+
+
+In order to properly handle the export of the :class:`Status` enum we must implement a custom function \
+:meth:`_serialize_configuration()`:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 49-61
+
+
+Note that the configuration is actually a dictionary consisting of two sections:
+
+ * ``header`` (storing class specification, NeMo version, NeMo collection name etc.) and
+ * ``init_params`` storing the parameters used for instantiation of the object.
+
+Those parameters are stored in the protected ``self._init_params``  field of the base :class:`NeuralModule` class.
+It is assumed that (aside of this use-case) the user won't access nor use them directly.
+
+Analogically, we must overload the :meth:`_deserialize_configuration()` method:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 63-86
+
+.. note::
+    It is worth emphasizing that the :meth:`_deserialize_configuration()` is a class method, 
+    analogically to public :meth:`import_from_config()` and :meth:`deserialize()` methods
+    that return a new object instance - in this case of the hardcoded :class:`CustomTaylorNet` type.
+
+
+Now we can simply create an instance and export its configuration by calling:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 95-96,101-102
+
+And instantiate a second by loading that configuration:
+
+.. literalinclude:: ../../../../examples/start_here/module_custom_configuration.py
+   :language: python
+   :lines: 104-106
+
+As a result we will see that the new object has set the status to the same value as the original one:
+
+.. code-block:: bash
+
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:74] Configuration of module 3ec99d30-baba-4e4c-a62b-e91268762864 (CustomTaylorNet) exported to /tmp/custom_taylor_net.yml
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:41] Status: Status.error
+    [NeMo I 2020-02-18 20:15:50 module_custom_configuration:114] Instantiated a new Neural Module of type `CustomTaylorNet` using configuration loaded from the `/tmp/custom_taylor_net.yml` file
diff --git a/docs/sources/source/tutorials/neural_graphs.rst b/docs/sources/source/tutorials/neural_graphs.rst
new file mode 100644
index 000000000000..e8f363457237
--- /dev/null
+++ b/docs/sources/source/tutorials/neural_graphs.rst
@@ -0,0 +1,54 @@
+Neural Graphs
+=============
+
+The Neural Graph is a high-level abstract concept empowering the user to build graphs consisting of many,
+interconnected Neural Modules.
+Once the user defines a graph, its topology is “frozen”, i.e. connections between modules cannot change.
+If a user wants to change the topology - he/she can build another graph, potentially spanned over the same modules.
+At the same time, he can reuse and nest one graph into another.
+
+
+.. figure:: neural_graphs_general.png
+
+The import/export/save/restore options combined with the lightweight API make Neural Graphs
+a perfect tool for rapid prototyping and experimentation.
+
+There are two Jupyter Notebook tutorials focusing on different aspects of the Neural Graphs functionality.
+
+Tutorial I: The basic functionality
+-----------------------------------
+
+In this first part of the Neural Graphs (NGs) tutorial we will focus on a simple example:
+training TaylorNet module to approximate a sine wave function.
+We will build a simple "model graph" and show how we can nest it into another graphs.
+
+
+.. figure:: neural_graphs_nesting.png
+
+This part covers the following:
+ * how to create a Neural Graph object
+ * how to activate/deactivate graph context (in various ways)
+ * how to bind NG inputs and outpus (in various ways)
+ * how to nest one graph (representing the our "trainable model") into training and validation graphs
+
+
+Tutorial II: The advanced functionality
+---------------------------------------
+
+In this first part of the Neural Graphs (NGs) tutorial we will focus on a more complex example:
+training of an End-to-End Convolutional Neural Acoustic Model called JASPER.
+We will build a "model graph" and show how we can nest it into another graphs, how we can freeze/unfreeze modules,
+use graph configuration and save/load graph checkpoints.
+
+This part covers the following:
+ * how to nest one graph into another
+ * how to serialize and deserialize a graph
+ * how to export and import serialized graph configuration to/from YAML files
+ * how to save and load graph checkpoints (containing weights of the Trainable NMs)
+ * how to freeze/unfreeze modules in a graph
+ 
+Additionally, we will show how use `AppState` to list all the modules and graphs we have created in the scope of
+our application.
+
+.. note::
+    Both tutorial notebooks can be found in the `nemo/examples/neural_graphs` folder.
diff --git a/docs/sources/source/tutorials/neural_graphs_general.png b/docs/sources/source/tutorials/neural_graphs_general.png
new file mode 100644
index 000000000000..996e3db26e3d
Binary files /dev/null and b/docs/sources/source/tutorials/neural_graphs_general.png differ
diff --git a/docs/sources/source/tutorials/neural_graphs_nesting.png b/docs/sources/source/tutorials/neural_graphs_nesting.png
new file mode 100644
index 000000000000..c411587714b8
Binary files /dev/null and b/docs/sources/source/tutorials/neural_graphs_nesting.png differ
diff --git a/docs/sources/source/tutorials/neuraltypes.rst b/docs/sources/source/tutorials/neuraltypes.rst
index 5620f3737c6f..ebcae6a1a235 100644
--- a/docs/sources/source/tutorials/neuraltypes.rst
+++ b/docs/sources/source/tutorials/neuraltypes.rst
@@ -1,63 +1,166 @@
 Neural Types
 ============
 
-Neural Types are used to check input tensors to make sure that two neural modules are compatible, and catch
-semantic and dimensionality errors.
+Basics
+~~~~~~
 
-Neural Types are implemented by :class:`NeuralType<nemo.core.neural_types.NeuralType>` class which is a mapping from Tensor's axis to :class:`AxisType<nemo.core.neural_types.AxisType>`.
+All input and output ports of every neural module in NeMo are typed.
+The type system's goal is check compatibility of connected input/output port pairs.
+The type system's constraints are checked when the user connects modules with each other and before any training or
+inference is started.
 
-:class:`AxisType<nemo.core.neural_types.AxisType>` contains following information per axis:
+Neural Types are implemented with the Python class :class:`NeuralType<nemo.core.neural_types.NeuralType>` and helper
+classes derived from :class:`ElementType<nemo.core.neural_types.ElementType>`, :class:`AxisType<nemo.core
+.neural_types.AxisType>` and :class:`AxisKindAbstract<nemo.core.neural_types.AxisKindAbstract>`.
 
-* Semantic Tag, which must inherit from :class:`BaseTag<nemo.core.neural_types.BaseTag>`, for example: :class:`BatchTag<nemo.core.neural_types.BatchTag>`, :class:`ChannelTag<nemo.core.neural_types.ChannelTag>`, :class:`TimeTag<nemo.core.neural_types.TimeTag>`, etc. These tags can be related via `is-a` inheritance.
-* Dimension: unsigned integer
-* Descriptor: string
+**A Neural Type contains two categories of information:**
 
+* **axes** - represents what varying a particular axis means (e.g. batch, time, etc.)
+* **elements_type** - represents the semantics and properties of what is stored inside the activations (audio signal,text embedding, logits, etc.)
 
-To instantiate a NeuralType you should pass it a dictionary (axis2type) which will map axis to it's AxisType.
-For example, a ResNet18 input and output ports can be described as:
+
+To instantiate a NeuralType you need to pass it the following arguments: `axes: Optional[Tuple] = None,
+elements_type: ElementType = VoidType(), optional=False`. Typically, the only place where you need to instantiate
+:class:`NeuralType<nemo.core.neural_types.NeuralType>` objects are inside your module's `input_ports` and
+`output_ports` properties.
+
+
+Consider an example below. It represents an (audio) data layer output ports, used in Speech recognition collection.
 
 .. code-block:: python
 
-    input_ports = {"x": NeuralType({0: AxisType(BatchTag),
-                                    1: AxisType(ChannelTag),
-                                    2: AxisType(HeightTag, 224),
-                                    3: AxisType(WidthTag, 224)})}
-    output_ports = {"output": NeuralType({
-                                    0: AxisType(BatchTag),
-                                    1: AxisType(ChannelTag)})}
+        {
+            'audio_signal': NeuralType(axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=False),
+                                             AxisType(kind=AxisKind.Time, size=None, is_list=False)),
+                                       elements_type=AudioSignal(freq=self._sample_rate)),
+            'a_sig_length': NeuralType(axes=tuple(AxisType(kind=AxisKind.Batch, size=None, is_list=False)),
+                                       elements_type=LengthsType()),
+            'transcripts': NeuralType(axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=False),
+                                             AxisType(kind=AxisKind.Time, size=None, is_list=False)),
+                                      elements_type=LabelsType()),
+            'transcript_length': NeuralType(axes=tuple(AxisType(kind=AxisKind.Batch, size=None, is_list=False)),
+                                            elements_type=LengthsType()),
+        }
+
+A less verbose version of exactly the same output ports looks like this:
 
+.. code-block:: python
 
+        {
+            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            'a_sig_length': NeuralType(tuple('B'), LengthsType()),
+            'transcripts': NeuralType(('B', 'T'), LabelsType()),
+            'transcript_length': NeuralType(tuple('B'), LengthsType()),
+        }
 
-**Neural type comparison**
 
-Two :class:`NeuralType<nemo.core.neural_types.NeuralType>` objects can be compared using ``.compare`` method.
-The result is:
+
+Neural type comparison
+~~~~~~~~~~~~~~~~~~~~~~
+
+Two :class:`NeuralType<nemo.core.neural_types.NeuralType>` objects are compared using ``.compare`` method.
+The result is from the :class:`NeuralTypeComparisonResult<nemo.core.neural_types.NeuralTypeComparisonResult>`:
 
 .. code-block:: python
 
     class NeuralTypeComparisonResult(Enum):
-      """The result of comparing two neural type objects for compatibility.
-      When comparing A.compare_to(B):"""
-      SAME = 0
-      LESS = 1  # A is B
-      GREATER = 2  # B is A
-      DIM_INCOMPATIBLE = 3  # Resize connector might fix incompatibility
-      TRANSPOSE_SAME = 4 # A transpose will make them same
-      INCOMPATIBLE = 5  # A and B are incompatible. Can't fix incompatibility automatically
+        """The result of comparing two neural type objects for compatibility.
+        When comparing A.compare_to(B):"""
+
+        SAME = 0
+        LESS = 1  # A is B
+        GREATER = 2  # B is A
+        DIM_INCOMPATIBLE = 3  # Resize connector might fix incompatibility
+        TRANSPOSE_SAME = 4  # A transpose and/or converting between lists and tensors will make them same
+        CONTAINER_SIZE_MISMATCH = 5  # A and B contain different number of elements
+        INCOMPATIBLE = 6  # A and B are incompatible
+        SAME_TYPE_INCOMPATIBLE_PARAMS = 7  # A and B are of the same type but parametrized differently
+
+
+Special cases
+~~~~~~~~~~~~~
+
+* **Void** element types. Sometimes, it is necessary to have a functionality similar to "void*" in C/C++. That, is if we still want to enforce order and axes' semantics but should be able to accept elements of any type. This can be achieved by using an instance of :class:`VoidType<nemo.core.neural_types.VoidType>` as ``elements_type`` argument.
+* **Big void** this type will effectively disable any type checks. This is how to create such type: ``NeuralType()``. The result of its comparison to any other type will always be SAME.
+* **AxisKind.Any** this axis kind is used to represent any axis. This is useful, for example, in losses where a specific loss module can be used in difference applications and therefore with different axis kinds
+
+Inheritance
+~~~~~~~~~~~
+
+Type inheritance is a very powerful tool in programming. NeMo's neural types support inheritance. Consider the
+following example below.
+
+**Example.** We want to represent the following. A module's A output (out1) produces mel-spectrogram
+signal, while module's B output produces mffc-spectrogram. We also want to a thrid module C which can perform data
+augmentation with any kind of spectrogram. With NeMo neural types representing this semantics is easy:
 
+.. code-block:: python
+
+    input = NeuralType(('B', 'D', 'T'), SpectrogramType())
+    out1 = NeuralType(('B', 'D', 'T'), MelSpectrogramType())
+    out2 = NeuralType(('B', 'D', 'T'), MFCCSpectrogramType())
+
+    # then the following comparison results will be generated
+    input.compare(out1) == SAME
+    input.compare(out2) == SAME
+    out1.compare(input) == INCOMPATIBLE
+    out2.compare(out1) == INCOMPATIBLE
+
+This happens because both ``MelSpectrogramType`` and ``MFCCSpectrogramType`` inherit from ``SpectrogramType`` class.
+Notice, that mfcc and mel spectrograms aren't interchangable, which is why ``out1.compare(input) == INCOMPATIBLE``
 
-**Special cases**
+Advanced usage
+~~~~~~~~~~~~~~
 
-* *Non-tensor* objects should be denoted as ``NeuralType(None)``
-* *Optional*: input is as optional, if input is provided the type compatibility will be checked
-* *Root* type is denoted by ``NeuralType({})``: A port of ``NeuralType({})`` type must accept NmTensors of any NeuralType:
+**Extending with user-defined types.** If you need to add your own element types, create a new class inheriting from
+:class:`ElementType<nemo.core.neural_types.ElementType>`. Instead of using built-in axes kinds from
+:class:`AxisKind<nemo.core.neural_types.AxisKind>`, you can define your own
+by creating a new Python enum which should inherit from :class:`AxisKindAbstract<nemo.core.neural_types.AxisKindAbstract>`.
+
+**Lists**. Sometimes module's input or output should be a list (possibly nested) of Tensors. NeMo's
+:class:`AxisType<nemo.core.neural_types.AxisType>` class accepts ``is_list`` argument which could be set to True.
+Consider the example below:
 
 .. code-block:: python
 
-    root_type = NeuralType({})
-    root_type.compare(any_other_neural_type) == NeuralTypeComparisonResult.SAME
+        T1 = NeuralType(
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                AxisType(kind=AxisKind.Time, size=None, is_list=True),
+                AxisType(kind=AxisKind.Dimension, size=32, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=128, is_list=False),
+                AxisType(kind=AxisKind.Dimension, size=256, is_list=False),
+            ),
+            elements_type=ChannelType(),
+        )
+
+In this example, first two axes are lists. That is the object are list of lists of rank 3 tensors with dimensions
+(32x128x256). Note that all list axes must come before any tensor axis.
+
+.. tip::
+    We strongly recommend this to be avoided, if possible, and tensors used instead (perhaps) with padding.
 
-See "nemo/tests/test_neural_types.py" for more examples.
+
+**Named tuples (structures).** To represent struct-like objects, for example, bounding boxes in computer vision, use
+the following syntax:
+
+.. code-block:: python
+
+        class BoundingBox(ElementType):
+            def __str__(self):
+                return "bounding box from detection model"
+            def fields(self):
+                return ("X", "Y", "W", "H")
+        # ALSO ADD new, user-defined, axis kind
+        class AxisKind2(AxisKindAbstract):
+            Image = 0
+        T1 = NeuralType(elements_type=BoundingBox(),
+                        axes=(AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                              AxisType(kind=AxisKind2.Image, size=None, is_list=True)))
+
+In the example above, we create a special "element type" class for BoundingBox which stores exactly 4 values.
+We also, add our own axis kind (Image). So the final Neural Type (T1) represents lists (for batch) of lists (for
+image) of bounding boxes. Under the hood it should be list(lists(4x1 tensors)).
 
 
 **Neural Types help us to debug models**
@@ -76,6 +179,5 @@ For example, module should concatenate (add) two input tensors X and Y along dim
 
 A module expects image of size 224x224 but gets 256x256. The type comparison will result in ``NeuralTypeComparisonResult.DIM_INCOMPATIBLE`` .
 
-.. note::
-    This type mechanism is represented by Python inheritance. That is, :class:`NmTensor<nemo.core.neural_types.NmTensor>` class inherits from :class:`NeuralType<nemo.core.neural_types.NeuralType>` class.
+
 
diff --git a/docs/sources/source/tutorials/weightsharing.rst b/docs/sources/source/tutorials/weightsharing.rst
index 4b6289421519..fa56ba20a722 100644
--- a/docs/sources/source/tutorials/weightsharing.rst
+++ b/docs/sources/source/tutorials/weightsharing.rst
@@ -3,8 +3,8 @@ Weight Sharing between Modules
 
 There are several ways to share or tie weights between neural modules.
 
-Neural Module reuse
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+Neural Module Reuse
+~~~~~~~~~~~~~~~~~~~
 
 The idea is to re-use neural modules between training, evaluation and inference graphs.
 For example:
@@ -15,7 +15,7 @@ For example:
     train_dataloader = nemo.TrainDataLayer(**train_config)
     eval_dataloader = nemo.EvalDataLayer(**eval_config)
 
-    L = nemo.MaskedXEntropyLoss()
+    L = nemo.tutorials.MaskedXEntropyLoss()
 
     # training model
 
@@ -36,18 +36,19 @@ For example:
     ...
 
 
-Copy weights between modules
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Weight Copying Between Modules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 :class:`NeuralModule<nemo.core.neural_modules.NeuralModule>` class provides 2 methods
 :meth:`get_weights<nemo.core.neural_modules.NeuralModule.get_weights>` and
 :meth:`set_weights<nemo.core.neural_modules.NeuralModule.set_weights>` 
-for sharing weights.
+for copying weights.
 
 .. note::
     :meth:`set_weights<nemo.core.neural_modules.NeuralModule.set_weights>` method can set only part of module's weights.
 
 .. important::
     This approach is used only to copy weights. Subsequent update of weights in one module will not affect weights in the other module.
+    This means that the weights will get DIFFERENT gradients on the update step.
 
 Consider an example:
 
@@ -72,19 +73,21 @@ Consider an example:
                                                        tn3.get_weights()))
 
 
-Tie weights between modules
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Weight Tying Between Modules
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 :class:`NeuralModule<nemo.core.neural_modules.NeuralModule>` class provides :meth:`tie_weights_with<nemo.core.neural_modules.NeuralModule.tie_weights_with>` method to tie weights between two or more modules.
 
 .. important::
-    Tied weights are identical across all modules: subsequent modification of weights in one module will result in the same modification on the other.
+    Tied weights are identical across all modules. Gradients to the weights will be the SAME.
 
+.. important::
+    However manually updating the weight on one module via tensor.data will NOT update the weight on the other module
 
 In the example below we first create a simple embedding encoder which takes [batch, time] sequences of word ids from vocabulary ``V``  and embeds them into some ``D``-dimensional space. Effectively, this is a lookup-based projection from ``V``-dimensional space to ``D``-dimensional space. We then create a decoder which projects from ``D``-dimensional space back to the ``V``-dimensional space. We want to transpose the encoder projection matrix and reuse it for decoder.
 The code below demonstrates how this can be achieved.
 
 .. note::
-   The weights have different names (``embedding.weight`` and ``projection.weight``) but their values are the same. Changes to one will result to changes in the other. Effectively, ``embedding.weight`` and ``projection.weight`` become pointers to the same tensor.
+   The weights have different names (``embedding.weight`` and ``projection.weight``) but their values and gradient updates will be the same.
 
 
 .. code-block:: python
@@ -102,14 +105,6 @@ The code below demonstrates how this can be achieved.
     self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(),
                                    proj.projection.weight.detach().numpy()))
 
-    was = embd.embedding.weight.detach().numpy()
-
-    # Now, change weights on one object
-    embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2))*1.0)
-    after = embd.embedding.weight.detach().numpy()
-
-    # Make sure that the change was reflected on another object
-    self.assertTrue(np.array_equal(embd.embedding.weight.detach().numpy(),
-                                    proj.projection.weight.detach().numpy()))
-    self.assertFalse(np.array_equal(was, after))
-
+.. warning::
+    Manually setting the weight tensors to be equal to the other will likely break multi-GPU and multi-node runs. Eg,
+    ``embd.embedding.weight = proj.projection.weights`` is not recommended. Use the ``tie_weights_with()`` function instead
diff --git a/docs/sources/update_docs_docker.sh b/docs/sources/update_docs_docker.sh
new file mode 100755
index 000000000000..e5d1a4ec3e0a
--- /dev/null
+++ b/docs/sources/update_docs_docker.sh
@@ -0,0 +1,2 @@
+cd ../../
+docker run --rm -v $PWD:/workspace python:3.7 /bin/bash -c "cd /workspace && pip install -r requirements/requirements_docs.txt && cd docs/sources/ && bash update_docs.sh"
diff --git a/examples/applications/asr_service/app/__init__.py b/examples/applications/asr_service/app/__init__.py
index a31e50d7ef94..f5da84fa3f61 100644
--- a/examples/applications/asr_service/app/__init__.py
+++ b/examples/applications/asr_service/app/__init__.py
@@ -7,8 +7,7 @@
 
 import nemo
 import nemo.collections.asr as nemo_asr
-
-logging = nemo.logging
+from nemo.utils import logging
 
 app = Flask(__name__)
 # make sure WORK_DIR exists before calling your service
diff --git a/examples/applications/asr_service/app/routes.py b/examples/applications/asr_service/app/routes.py
index 35e543173ac0..7bd636a9b39f 100644
--- a/examples/applications/asr_service/app/routes.py
+++ b/examples/applications/asr_service/app/routes.py
@@ -17,10 +17,8 @@
 from flask import request
 from werkzeug.utils import secure_filename
 
-import nemo
 import nemo.collections.asr as nemo_asr
-
-logging = nemo.logging
+from nemo.utils import logging
 
 try:
     from app import beam_search_with_lm
@@ -86,7 +84,7 @@ def transcribe_file():
         greedy = True
         if request.form.get('beam'):
             if not ENABLE_NGRAM:
-                return "Error: Beam Search with ngram LM is not enabled " "on this server"
+                return "Error: Beam Search with ngram LM is not enabled on this server"
             greedy = False
         file_path = os.path.join(WORK_DIR, secure_filename(f.filename))
         f.save(file_path)
diff --git a/examples/asr/QuartzNetModel.ipynb b/examples/asr/QuartzNetModel.ipynb
new file mode 100644
index 000000000000..cef071afa955
--- /dev/null
+++ b/examples/asr/QuartzNetModel.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# NeMo Models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import NeMo and ASR collection\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "try:\n",
+    "    nf = nemo.core.NeuralModuleFactory()\n",
+    "except:\n",
+    "    print(\"GPU was not detected. Running on CPU\")\n",
+    "    nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## NeMoModel instantiation without pre-trained weights"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.\n",
+    "NeMoModel can have other NeuralModules inside and their mode, and topology of connections can\n",
+    "depend on the mode in which NeMo model is used (training or evaluation)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#First, load the config from YAML file\n",
+    "from ruamel.yaml import YAML\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(\"../configs/jasper_an4.yaml\") as file:\n",
+    "    model_definition = yaml.load(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model1 = nemo.collections.asr.models.QuartzNet(\n",
+    "    preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],\n",
+    "    encoder_params=model_definition['JasperEncoder'],\n",
+    "    decoder_params=model_definition['JasperDecoderForCTC'])\n",
+    "print(f\"Created QuartzNet model with {quartznet_model1.num_weights} weights\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Because NeMoModel is a NeuralModule, regular config import/export work"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model1.export_to_config(\"qn1.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file=\"qn1.yaml\")\n",
+    "print(f\"Created QuartzNet model with {quartznet_model2.num_weights} weights\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## NeMoModel instantiation with pre-trained weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# List all available models from NGC\n",
+    "for checkpoint in nemo.collections.asr.models.ASRConvCTCModel.list_pretrained_models():\n",
+    "    print(checkpoint.pretrained_model_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Automagically go to NGC and instantiate a model and weights\n",
+    "quartznet_model3 = nemo_asr.models.QuartzNet.from_pretrained(model_info=\"QuartzNet15x5-En\")\n",
+    "print(f\"Created QuartzNet model with {quartznet_model3.num_weights} weights\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Export model to \".nemo\" format"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Export to \".nemo\" file - all params, structure and weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model3.save_to('quartznet.nemo')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model4 = nemo_asr.models.QuartzNet.from_pretrained(model_info='quartznet.nemo')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# \".nemo\" file is just an arxiv with all of the model's details and weights\n",
+    "! mv quartznet.nemo quartznet.tar.gz\n",
+    "! tar -xvf quartznet.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Export to \".nemo\" file - for deployment with NVIDIA Jarvis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "quartznet_model3.save_to('quartznet_for_Jarvis.nemo', optimize_for_deployment=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# \".nemo\" file optimized for deployment will only contain eval structure and .onnx files\n",
+    "! mv quartznet_for_Jarvis.nemo quartznet_for_Jarvis.nemo.tar.gz\n",
+    "! tar -xvf quartznet_for_Jarvis.nemo.tar.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## NeMoModels can be used just as any other Neural Module"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Change these to point to your training data\n",
+    "train_manifest = \"/Users/okuchaiev/Data/an4_dataset/an4_train.json\"\n",
+    "val_manifest = \"/Users/okuchaiev/Data/an4_dataset/an4_val.json\"\n",
+    "labels = model_definition['labels']\n",
+    "data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)\n",
+    "data_layerE = nemo_asr.AudioToTextDataLayer(manifest_filepath=val_manifest, labels=labels, batch_size=16)\n",
+    "ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))\n",
+    "greedy_decoder = nemo_asr.GreedyCTCDecoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audio_signal, audio_signal_len, transcript, transcript_len = data_layer()\n",
+    "log_probs, encoded_len = quartznet_model4(input_signal=audio_signal, length=audio_signal_len)\n",
+    "predictions = greedy_decoder(log_probs=log_probs)\n",
+    "loss = ctc_loss(log_probs=log_probs, targets=transcript,\n",
+    "                input_length=encoded_len, target_length=transcript_len)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# START TRAINING \n",
+    "tensors_to_evaluate=[predictions, transcript, transcript_len]\n",
+    "from functools import partial\n",
+    "from nemo.collections.asr.helpers import monitor_asr_train_progress\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[loss]+tensors_to_evaluate,\n",
+    "    print_func=partial(monitor_asr_train_progress, labels=labels))\n",
+    "nf.train(tensors_to_optimize=[loss],\n",
+    "                callbacks=[train_callback],\n",
+    "                optimizer=\"novograd\",\n",
+    "                optimization_params={\"num_epochs\": 30, \"lr\": 1e-2,\n",
+    "                                    \"weight_decay\": 1e-3})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/asr/configs/jasper10x5.yaml b/examples/asr/configs/jasper10x5.yaml
index 4b9d80a8fa76..c7c8429be6d7 100644
--- a/examples/asr/configs/jasper10x5.yaml
+++ b/examples/asr/configs/jasper10x5.yaml
@@ -1,137 +1,125 @@
-model: "Jasper"
-sample_rate: 16000
-
-AudioToTextDataLayer:
-    max_duration: 16.7
-    trim_silence: true
-    normalize_transcripts: true
-
-    train:
-        shuffle: true
-
-    eval:
-        shuffle: false
-        max_duration: null
-
-AudioToMelSpectrogramPreprocessor:
-    window_size: 0.02
-    window_stride: 0.01
-    window: "hann"
-    normalize: "per_feature"
-    n_fft: 512
-    features: 64
-    dither: 0.00001
-    pad_to: 16
-    stft_conv: true
-
-JasperEncoder:
-    activation: "relu"
-    conv_mask: true
-
-    jasper:
-        -   filters: 256
-            repeat: 1
-            kernel: [11]
-            stride: [2]
-            dilation: [1]
-            dropout: 0.2
-            residual: false
-
-        -   filters: 256
-            repeat: 5
-            kernel: [11]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [11]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 384
-            repeat: 5
-            kernel: [13]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 384
-            repeat: 5
-            kernel: [13]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [17]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [17]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-
-        -   filters: 640
-            repeat: 5
-            kernel: [21]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-
-        -   filters: 640
-            repeat: 5
-            kernel: [21]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-
-        -   filters: 768
-            repeat: 5
-            kernel: [25]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-
-        -   filters: 768
-            repeat: 5
-            kernel: [25]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-
-        -   filters: 896
-            repeat: 1
-            kernel: [29]
-            stride: [1]
-            dilation: [2]
-            dropout: 0.4
-            residual: false
-
-        -   filters: 1024
-            repeat: 1
-            kernel: [1]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.4
-            residual: false
-
-labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
-         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+header: {collection_type: asr, collection_version: null, full_spec: nemo.collections.asr.models.asrconvctcmodel.JasperNet,
+  nemo_core_version: 0.11.0b0}
+init_params:
+  decoder_params:
+    header: {full_spec: nemo.collections.asr.JasperDecoderForCTC}
+    init_params:
+      feat_in: 1024
+      num_classes: 28
+      vocabulary: [' ', a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t,
+        u, v, w, x, y, z, '''']
+  encoder_params:
+    header: {full_spec: nemo.collections.asr.JasperEncoder}
+    init_params:
+      activation: relu
+      conv_mask: true
+      feat_in: 64
+      jasper:
+      - dilation: [1]
+        dropout: 0.2
+        filters: 256
+        kernel: [11]
+        repeat: 1
+        residual: false
+        stride: [2]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 256
+        kernel: [11]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 256
+        kernel: [11]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 384
+        kernel: [13]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 384
+        kernel: [13]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 512
+        kernel: [17]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.2
+        filters: 512
+        kernel: [17]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.3
+        filters: 640
+        kernel: [21]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.3
+        filters: 640
+        kernel: [21]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.3
+        filters: 768
+        kernel: [25]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.3
+        filters: 768
+        kernel: [25]
+        repeat: 5
+        residual: true
+        residual_dense: true
+        stride: [1]
+      - dilation: [2]
+        dropout: 0.4
+        filters: 896
+        kernel: [29]
+        repeat: 1
+        residual: false
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.4
+        filters: 1024
+        kernel: [1]
+        repeat: 1
+        residual: false
+        stride: [1]
+  preprocessor_params:
+    header: {full_spec: nemo.collections.asr.AudioToMelSpectrogramPreprocessor}
+    init_params: {dither: 1e-05, features: 64, n_fft: 512, normalize: per_feature,
+      pad_to: 16, stft_conv: true, window: hann, window_size: 0.02, window_stride: 0.01}
+  spec_augment_params:
+    header: {full_spec: nemo.collections.asr.SpectrogramAugmentation}
+    init_params: {rect_freq: 50, rect_masks: 5, rect_time: 120}
diff --git a/examples/asr/configs/jasper10x5dr.yaml b/examples/asr/configs/jasper10x5dr.yaml
deleted file mode 100644
index c5052f8c8b36..000000000000
--- a/examples/asr/configs/jasper10x5dr.yaml
+++ /dev/null
@@ -1,147 +0,0 @@
-model: "Jasper"
-sample_rate: 16000
-
-AudioToTextDataLayer:
-    max_duration: 16.7
-    trim_silence: true
-    normalize_transcripts: true
-
-    train:
-        shuffle: true
-
-    eval:
-        shuffle: false
-        max_duration: null
-
-AudioToMelSpectrogramPreprocessor:
-    window_size: 0.02
-    window_stride: 0.01
-    window: "hann"
-    normalize: "per_feature"
-    n_fft: 512
-    features: 64
-    dither: 0.00001
-    pad_to: 16
-    stft_conv: true
-
-JasperEncoder:
-    activation: "relu"
-    conv_mask: true
-
-    jasper:
-        -   filters: 256
-            repeat: 1
-            kernel: [11]
-            stride: [2]
-            dilation: [1]
-            dropout: 0.2
-            residual: false
-
-        -   filters: 256
-            repeat: 5
-            kernel: [11]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [11]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 384
-            repeat: 5
-            kernel: [13]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 384
-            repeat: 5
-            kernel: [13]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [17]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [17]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.2
-            residual: true
-            residual_dense: true
-
-        -   filters: 640
-            repeat: 5
-            kernel: [21]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-            residual_dense: true
-
-        -   filters: 640
-            repeat: 5
-            kernel: [21]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-            residual_dense: true
-
-        -   filters: 768
-            repeat: 5
-            kernel: [25]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-            residual_dense: true
-
-        -   filters: 768
-            repeat: 5
-            kernel: [25]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.3
-            residual: true
-            residual_dense: true
-
-        -   filters: 896
-            repeat: 1
-            kernel: [29]
-            stride: [1]
-            dilation: [2]
-            dropout: 0.4
-            residual: false
-
-        -   filters: 1024
-            repeat: 1
-            kernel: [1]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.4
-            residual: false
-
-labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
-         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
diff --git a/examples/asr/configs/jasper_an4.yaml b/examples/asr/configs/jasper_an4.yaml
index cd07bf82c951..1273036a1bee 100644
--- a/examples/asr/configs/jasper_an4.yaml
+++ b/examples/asr/configs/jasper_an4.yaml
@@ -3,87 +3,112 @@ sample_rate: &sample_rate 16000
 dropout: &drop 0.0
 repeat:  &rep  1
 
-AudioToTextDataLayer:
-    sample_rate: *sample_rate
-    train:
+labels: &labels [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+
+
+AudioToTextDataLayer_train:
+    header:
+        full_spec: nemo.collections.asr.AudioToTextDataLayer
+    init_params:
+        sample_rate: *sample_rate
+        labels: *labels
+        batch_size: 48
         shuffle: true
-    eval:
+
+
+AudioToTextDataLayer_eval:
+    header:
+        full_spec: nemo.collections.asr.AudioToTextDataLayer
+    init_params:
+        sample_rate: *sample_rate
+        labels: *labels
+        batch_size: 64
         shuffle: false
 
+
 AudioToMelSpectrogramPreprocessor:
-    normalize: "per_feature"
-    window_size: 0.02
-    window_stride: 0.01
-    window: "hann"
-    features: &n_mels 64
-    n_fft: 512
-    frame_splicing: 1
-    dither: 0.00001
-    stft_conv: true
+    header:
+        full_spec: nemo.collections.asr.AudioToMelSpectrogramPreprocessor
+    init_params:
+        normalize: "per_feature"
+        window_size: 0.02
+        sample_rate: *sample_rate
+        window_stride: 0.01
+        window: "hann"
+        features: &n_mels 64
+        n_fft: 512
+        frame_splicing: 1
+        dither: 0.00001
+        stft_conv: true
 
 JasperEncoder:
-    feat_in: *n_mels
-    activation: "relu"
-
-    jasper:
-        -   filters: 128
-            repeat: 1
-            kernel: [11]
-            stride: [1]
-            dilation: [1]
-            dropout: *drop
-            residual: true
-
-        -   filters: 256
-            repeat: *rep
-            kernel: [13]
-            stride: [1]
-            dilation: [1]
-            dropout: *drop
-            residual: true
-
-        -   filters: 256
-            repeat: *rep
-            kernel: [15]
-            stride: [1]
-            dilation: [1]
-            dropout: *drop
-            residual: true
-
-        -   filters: 256
-            repeat: *rep
-            kernel: [17]
-            stride: [1]
-            dilation: [1]
-            dropout: *drop
-            residual: true
-
-        -   filters: 256
-            repeat: *rep
-            kernel: [19]
-            stride: [1]
-            dilation: [1]
-            dropout: *drop
-            residual: true
-
-        -   filters: 256
-            repeat: 1
-            kernel: [21]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: false
-
-        -   filters: &enc_feat_out 1024
-            repeat: 1
-            kernel: [1]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: false
+    header:
+        full_spec: nemo.collections.asr.JasperEncoder
+    init_params:
+        feat_in: *n_mels
+        activation: "relu"
 
-JasperDecoderForCTC:
-    feat_in: *enc_feat_out
+        jasper:
+            -   filters: 128
+                repeat: 1
+                kernel: [11]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
 
-labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
-         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+            -   filters: 256
+                repeat: *rep
+                kernel: [13]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+
+            -   filters: 256
+                repeat: *rep
+                kernel: [15]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+
+            -   filters: 256
+                repeat: *rep
+                kernel: [17]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+
+            -   filters: 256
+                repeat: *rep
+                kernel: [19]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+
+            -   filters: 256
+                repeat: 1
+                kernel: [21]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.0
+                residual: false
+
+            -   filters: &enc_feat_out 1024
+                repeat: 1
+                kernel: [1]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.0
+                residual: false
+
+JasperDecoderForCTC:
+    header:
+        full_spec: nemo.collections.asr.JasperDecoderForCTC
+    init_params:
+        feat_in: *enc_feat_out
+        num_classes: 28
diff --git a/examples/asr/configs/quartznet15x5-zh.yaml b/examples/asr/configs/quartznet15x5-zh.yaml
new file mode 100644
index 000000000000..16249b28144e
--- /dev/null
+++ b/examples/asr/configs/quartznet15x5-zh.yaml
@@ -0,0 +1,374 @@
+header: {collection_type: asr, collection_version: null, full_spec: nemo.collections.asr.models.asrconvctcmodel.QuartzNet,
+  nemo_core_version: 0.11.0b0}
+init_params:
+  decoder_params:
+    header: {full_spec: nemo.collections.asr.JasperDecoderForCTC}
+    init_params:
+      feat_in: 1024
+      num_classes: 5206
+      vocabulary: [' ', '''', A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R,
+        S, T, U, V, W, X, Y, Z, 㶧, 䶮, 一, 丁, 七, 万, 丈, 三, 上, 下, 不, 与, 丐, 丑, 专, 且, 丕,
+        世, 丘, 丙, 业, 丛, 东, 丝, 丞, 丢, 两, 严, 丧, 个, 丫, 中, 丰, 串, 临, 丸, 丹, 为, 主, 丽, 举, 乃,
+        久, 么, 义, 之, 乌, 乍, 乎, 乏, 乐, 乒, 乓, 乔, 乖, 乘, 乙, 九, 乞, 也, 习, 乡, 书, 买, 乱, 乳, 乾,
+        了, 予, 争, 事, 二, 于, 亏, 云, 互, 五, 井, 亘, 亚, 些, 亟, 亡, 亢, 交, 亥, 亦, 产, 亨, 亩, 享, 京,
+        亭, 亮, 亲, 亳, 亵, 人, 亿, 什, 仁, 仄, 仅, 仆, 仇, 今, 介, 仍, 从, 仑, 仓, 仔, 仕, 他, 仗, 付, 仙,
+        仞, 仟, 仡, 代, 令, 以, 仨, 仪, 们, 仰, 仲, 件, 价, 任, 份, 仿, 企, 伉, 伊, 伍, 伎, 伏, 伐, 休, 众,
+        优, 伙, 会, 伞, 伟, 传, 伢, 伤, 伦, 伪, 伫, 伯, 估, 伴, 伶, 伸, 伺, 似, 伽, 佃, 但, 位, 低, 住, 佐,
+        佑, 体, 何, 佗, 佘, 余, 佚, 佛, 作, 佝, 佟, 你, 佣, 佩, 佬, 佯, 佰, 佳, 佶, 佻, 佼, 使, 侃, 侄, 侈,
+        例, 侍, 侏, 侑, 侗, 供, 依, 侠, 侣, 侥, 侦, 侧, 侨, 侬, 侮, 侯, 侵, 便, 促, 俄, 俊, 俎, 俏, 俐, 俑,
+        俗, 俘, 俚, 保, 俞, 俟, 信, 俨, 俩, 俪, 俭, 修, 俯, 俱, 俸, 俺, 俾, 倌, 倍, 倒, 倔, 倘, 候, 倚, 倜,
+        借, 倡, 倦, 倩, 倪, 倭, 债, 值, 倾, 偃, 假, 偈, 偌, 偎, 偏, 偓, 偕, 做, 停, 健, 偶, 偷, 偻, 偿, 傀,
+        傅, 傍, 傣, 傥, 储, 催, 傲, 傻, 像, 僚, 僧, 僮, 僵, 僻, 儋, 儒, 儡, 儿, 兀, 允, 元, 兄, 充, 兆, 先,
+        光, 克, 免, 兑, 兔, 兖, 党, 兜, 兢, 入, 全, 八, 公, 六, 兮, 兰, 共, 关, 兴, 兵, 其, 具, 典, 兹, 养,
+        兼, 兽, 冀, 内, 冈, 冉, 册, 再, 冒, 冕, 冗, 写, 军, 农, 冠, 冢, 冤, 冥, 冬, 冯, 冰, 冲, 决, 况, 冶,
+        冷, 冻, 冼, 冽, 净, 凄, 准, 凇, 凉, 凋, 凌, 减, 凑, 凛, 凝, 几, 凡, 凤, 凭, 凯, 凰, 凳, 凶, 凸, 凹,
+        出, 击, 函, 凿, 刀, 刁, 刃, 分, 切, 刊, 刍, 刎, 刑, 划, 列, 刘, 则, 刚, 创, 初, 删, 判, 刨, 利, 别,
+        刮, 到, 制, 刷, 券, 刹, 刺, 刻, 刽, 剁, 剂, 剃, 削, 剌, 前, 剐, 剑, 剔, 剖, 剜, 剥, 剧, 剩, 剪, 副,
+        割, 剽, 剿, 劈, 力, 劝, 办, 功, 加, 务, 劣, 动, 助, 努, 劫, 劭, 励, 劲, 劳, 劵, 劾, 势, 勃, 勇, 勉,
+        勋, 勐, 勒, 勘, 募, 勤, 勺, 勾, 勿, 匀, 包, 匆, 匈, 匏, 匕, 化, 北, 匙, 匝, 匠, 匡, 匣, 匪, 匮, 匹,
+        区, 医, 匾, 匿, 十, 千, 升, 午, 卉, 半, 华, 协, 卑, 卒, 卓, 单, 卖, 南, 博, 卜, 卞, 占, 卡, 卢, 卤,
+        卦, 卧, 卫, 卯, 印, 危, 卲, 即, 却, 卵, 卷, 卸, 卿, 厂, 厄, 厅, 历, 厉, 压, 厌, 厕, 厘, 厚, 厝, 原,
+        厢, 厥, 厦, 厨, 厩, 厮, 去, 县, 叁, 参, 又, 叉, 及, 友, 双, 反, 发, 叔, 取, 受, 变, 叙, 叛, 叠, 口,
+        古, 句, 另, 叨, 叩, 只, 叫, 召, 叭, 叮, 可, 台, 叱, 史, 右, 叵, 叶, 号, 司, 叹, 叼, 叽, 吁, 吃, 各,
+        吆, 合, 吉, 吊, 吋, 同, 名, 后, 吏, 吐, 向, 吒, 吓, 吕, 吖, 吗, 君, 吝, 吞, 吟, 吠, 否, 吧, 吨, 吩,
+        含, 听, 吭, 吮, 启, 吱, 吴, 吵, 吸, 吹, 吻, 吼, 吾, 呀, 呃, 呆, 呈, 告, 呐, 呕, 呗, 员, 呛, 呜, 呢,
+        呦, 周, 呱, 呲, 味, 呵, 呷, 呸, 呻, 呼, 命, 咀, 咂, 咄, 咆, 咋, 和, 咎, 咏, 咐, 咒, 咔, 咕, 咖, 咘,
+        咙, 咚, 咝, 咣, 咤, 咦, 咧, 咨, 咩, 咪, 咫, 咬, 咭, 咯, 咱, 咳, 咸, 咻, 咽, 哀, 品, 哂, 哄, 哆, 哇,
+        哈, 哉, 响, 哎, 哐, 哑, 哒, 哔, 哕, 哗, 哟, 哥, 哦, 哨, 哩, 哪, 哭, 哮, 哲, 哺, 哼, 哽, 唁, 唆, 唇,
+        唉, 唏, 唐, 唑, 唛, 唠, 唢, 唤, 唧, 唬, 售, 唯, 唰, 唱, 唳, 唷, 唾, 啃, 啄, 商, 啊, 啕, 啖, 啜, 啡,
+        啤, 啥, 啦, 啧, 啪, 啬, 啰, 啲, 啵, 啶, 啸, 啼, 啾, 喀, 喁, 喂, 喃, 善, 喆, 喇, 喉, 喊, 喋, 喔, 喘,
+        喜, 喝, 喟, 喧, 喱, 喳, 喵, 喷, 喻, 喽, 嗄, 嗅, 嗑, 嗒, 嗓, 嗔, 嗖, 嗜, 嗝, 嗡, 嗣, 嗤, 嗦, 嗨, 嗪,
+        嗫, 嗬, 嗯, 嗲, 嗷, 嗽, 嘀, 嘈, 嘉, 嘎, 嘏, 嘘, 嘛, 嘞, 嘟, 嘣, 嘭, 嘱, 嘲, 嘴, 嘶, 嘹, 嘻, 嘿, 噌,
+        噎, 噗, 噘, 噙, 噜, 噢, 噤, 器, 噩, 噪, 噬, 噱, 噶, 噻, 噼, 嚎, 嚏, 嚓, 嚣, 嚷, 嚼, 囊, 囍, 囔, 囗,
+        囚, 四, 回, 因, 团, 囤, 囧, 囫, 园, 囯, 困, 囱, 围, 囵, 囹, 固, 国, 图, 圃, 圄, 圆, 圈, 土, 圣, 在,
+        圩, 圪, 圭, 地, 圳, 圹, 场, 圻, 圾, 址, 坂, 均, 坊, 坍, 坎, 坏, 坐, 坑, 块, 坚, 坛, 坝, 坞, 坟, 坠,
+        坡, 坤, 坦, 坨, 坩, 坪, 坭, 坯, 坳, 坷, 坻, 垂, 垃, 垄, 垅, 型, 垌, 垒, 垚, 垛, 垡, 垢, 垣, 垤, 垦,
+        垩, 垫, 垭, 垮, 埂, 埃, 埇, 埋, 城, 埔, 埕, 埚, 埝, 域, 埠, 埭, 埸, 培, 基, 堀, 堂, 堃, 堆, 堇, 堕,
+        堡, 堤, 堪, 堰, 堵, 堺, 塌, 塍, 塑, 塔, 塘, 塞, 填, 塬, 塾, 境, 墅, 墉, 墓, 増, 墙, 增, 墟, 墨, 墩,
+        壁, 壑, 壕, 壤, 士, 壬, 壮, 声, 壳, 壶, 壹, 处, 备, 复, 夏, 夔, 夕, 外, 夙, 多, 夜, 够, 大, 天, 太,
+        夫, 夭, 央, 夯, 失, 头, 夷, 夸, 夹, 夺, 奁, 奂, 奄, 奇, 奈, 奉, 奋, 奎, 奏, 契, 奔, 奕, 奖, 套, 奘,
+        奚, 奠, 奢, 奥, 女, 奴, 奶, 奸, 她, 好, 如, 妃, 妄, 妆, 妇, 妈, 妊, 妍, 妒, 妓, 妖, 妙, 妞, 妤, 妥,
+        妨, 妩, 妪, 妫, 妮, 妯, 妲, 妹, 妻, 妾, 姆, 姊, 始, 姐, 姑, 姓, 委, 姗, 姚, 姜, 姝, 姣, 姥, 姨, 姬,
+        姻, 姿, 威, 娃, 娄, 娅, 娆, 娇, 娈, 娉, 娌, 娓, 娘, 娜, 娟, 娠, 娣, 娥, 娩, 娱, 娲, 娴, 娶, 娼, 婀,
+        婆, 婉, 婊, 婕, 婚, 婢, 婧, 婪, 婴, 婵, 婶, 婷, 婺, 婿, 媒, 媚, 媛, 媞, 媲, 媳, 嫁, 嫂, 嫉, 嫌, 嫒,
+        嫔, 嫖, 嫚, 嫡, 嫣, 嫦, 嫩, 嫫, 嬅, 嬉, 嬗, 嬛, 嬴, 嬷, 孀, 子, 孑, 孔, 孕, 字, 存, 孙, 孚, 孛, 孜,
+        孝, 孟, 孢, 季, 孤, 学, 孩, 孪, 孬, 孰, 孱, 孳, 孵, 孺, 孽, 宁, 它, 宅, 宇, 守, 安, 宋, 完, 宏, 宓,
+        宕, 宗, 官, 宙, 定, 宛, 宜, 宝, 实, 宠, 审, 客, 宣, 室, 宥, 宦, 宪, 宫, 宰, 害, 宴, 宵, 家, 宸, 容,
+        宽, 宾, 宿, 寂, 寄, 寅, 密, 寇, 富, 寐, 寒, 寓, 寝, 寞, 察, 寡, 寥, 寨, 寮, 寰, 寸, 对, 寺, 寻, 导,
+        寿, 封, 射, 尅, 将, 尉, 尊, 小, 少, 尔, 尕, 尖, 尘, 尚, 尝, 尤, 尧, 尬, 就, 尴, 尸, 尹, 尺, 尼, 尽,
+        尾, 尿, 局, 屁, 层, 居, 屈, 屉, 届, 屋, 屌, 屎, 屏, 屐, 屑, 展, 属, 屠, 屡, 履, 屯, 山, 屹, 屿, 岁,
+        岂, 岌, 岐, 岑, 岔, 岖, 岗, 岚, 岛, 岩, 岬, 岭, 岱, 岳, 岷, 岸, 峁, 峋, 峒, 峙, 峡, 峥, 峦, 峨, 峪,
+        峭, 峰, 峻, 崂, 崃, 崆, 崇, 崎, 崔, 崖, 崛, 崧, 崩, 崭, 崮, 崴, 崽, 嵇, 嵊, 嵋, 嵌, 嵘, 嵛, 嵩, 嵬,
+        嶂, 嶙, 嶝, 巅, 巍, 川, 州, 巡, 巢, 工, 左, 巧, 巨, 巩, 巫, 差, 己, 已, 巳, 巴, 巷, 巾, 币, 市, 布,
+        帅, 帆, 师, 希, 帐, 帕, 帖, 帘, 帚, 帛, 帜, 帝, 带, 帧, 席, 帮, 帷, 常, 帼, 帽, 幂, 幄, 幅, 幌, 幔,
+        幕, 幡, 幢, 干, 平, 年, 并, 幸, 幺, 幻, 幼, 幽, 广, 庄, 庆, 庇, 床, 序, 庐, 库, 应, 底, 庖, 店, 庙,
+        庚, 府, 庞, 废, 度, 座, 庭, 庵, 庶, 康, 庸, 庹, 庾, 廉, 廊, 廓, 廖, 延, 廷, 建, 开, 异, 弃, 弄, 弈,
+        弊, 弋, 式, 弑, 弓, 引, 弗, 弘, 弛, 弟, 张, 弥, 弦, 弧, 弩, 弭, 弯, 弱, 弹, 强, 弼, 归, 当, 录, 彗,
+        彝, 形, 彤, 彦, 彩, 彪, 彬, 彭, 彰, 影, 彷, 役, 彻, 彼, 往, 征, 径, 待, 徇, 很, 徉, 徊, 律, 徐, 徒,
+        得, 徘, 徙, 徜, 御, 徨, 循, 微, 德, 徽, 心, 必, 忆, 忌, 忍, 忏, 忐, 忑, 忒, 忖, 志, 忘, 忙, 忠, 忡,
+        忤, 忧, 忪, 快, 忱, 念, 忻, 忽, 忿, 怀, 态, 怂, 怄, 怅, 怆, 怎, 怒, 怕, 怖, 怜, 思, 怠, 怡, 急, 怦,
+        性, 怨, 怪, 怫, 怯, 怵, 总, 怼, 怿, 恁, 恃, 恋, 恍, 恐, 恒, 恕, 恙, 恢, 恣, 恤, 恨, 恩, 恪, 恬, 恭,
+        息, 恰, 恳, 恶, 恸, 恺, 恻, 恼, 恿, 悄, 悉, 悌, 悍, 悔, 悖, 悚, 悟, 悠, 患, 悦, 您, 悬, 悭, 悯, 悱,
+        悲, 悴, 悸, 悻, 悼, 情, 惆, 惊, 惋, 惑, 惕, 惚, 惜, 惟, 惠, 惦, 惧, 惨, 惩, 惫, 惬, 惭, 惮, 惯, 惰,
+        想, 惶, 惹, 惺, 愁, 愈, 愉, 意, 愕, 愚, 感, 愣, 愤, 愧, 愫, 愿, 慈, 慌, 慎, 慑, 慕, 慢, 慧, 慨, 慰,
+        慵, 慷, 憋, 憎, 憔, 憧, 憨, 憩, 憬, 憷, 憾, 懂, 懈, 懊, 懋, 懑, 懒, 懦, 懵, 懿, 戈, 戊, 戌, 戍, 戎,
+        戏, 成, 我, 戒, 或, 戗, 战, 戚, 戛, 戟, 截, 戬, 戮, 戳, 戴, 户, 戾, 房, 所, 扁, 扇, 扈, 扉, 手, 才,
+        扎, 扑, 扒, 打, 扔, 托, 扛, 扞, 扣, 扦, 执, 扩, 扪, 扫, 扬, 扭, 扮, 扯, 扰, 扳, 扶, 批, 扼, 找, 承,
+        技, 抄, 抉, 把, 抑, 抒, 抓, 投, 抖, 抗, 折, 抚, 抛, 抠, 抡, 抢, 护, 报, 抨, 披, 抬, 抱, 抵, 抹, 押,
+        抽, 抿, 拂, 拄, 担, 拆, 拇, 拈, 拉, 拌, 拍, 拎, 拐, 拒, 拓, 拔, 拖, 拗, 拘, 拙, 拚, 招, 拜, 拟, 拢,
+        拣, 拥, 拦, 拧, 拨, 择, 括, 拭, 拮, 拯, 拱, 拳, 拴, 拷, 拼, 拽, 拾, 拿, 持, 挂, 指, 按, 挎, 挑, 挖,
+        挚, 挛, 挝, 挞, 挟, 挠, 挡, 挣, 挤, 挥, 挨, 挪, 挫, 振, 挺, 挽, 捂, 捅, 捆, 捉, 捋, 捌, 捍, 捎, 捏,
+        捐, 捕, 捞, 损, 捡, 换, 捣, 捧, 据, 捶, 捷, 捺, 捻, 掀, 掂, 掇, 授, 掉, 掌, 掏, 掐, 排, 掖, 掘, 掠,
+        探, 掣, 接, 控, 推, 掩, 措, 掬, 掮, 掰, 掳, 掴, 掷, 掸, 掺, 揄, 揉, 揍, 描, 提, 插, 握, 揣, 揩, 揪,
+        揭, 援, 揶, 揽, 搀, 搁, 搂, 搅, 搏, 搐, 搓, 搔, 搜, 搞, 搡, 搧, 搪, 搬, 搭, 携, 搽, 摁, 摄, 摆, 摇,
+        摈, 摊, 摒, 摔, 摘, 摞, 摧, 摩, 摸, 摹, 撂, 撅, 撇, 撑, 撒, 撕, 撞, 撤, 撩, 撬, 播, 撮, 撰, 撵, 撸,
+        撺, 撼, 擀, 擂, 擅, 操, 擎, 擒, 擘, 擞, 擢, 擦, 攀, 攒, 攘, 攥, 攫, 支, 收, 攸, 改, 攻, 放, 政, 故,
+        效, 敌, 敏, 救, 敕, 敖, 教, 敛, 敝, 敞, 敢, 散, 敦, 敬, 数, 敲, 整, 敷, 文, 斋, 斌, 斐, 斑, 斓, 斗,
+        料, 斛, 斜, 斟, 斡, 斤, 斥, 斧, 斩, 断, 斯, 新, 方, 施, 旁, 旅, 旋, 旌, 族, 旖, 旗, 无, 既, 日, 旦,
+        旧, 旨, 早, 旬, 旭, 旮, 旯, 旱, 时, 旷, 旺, 旻, 昀, 昂, 昆, 昊, 昌, 明, 昏, 易, 昔, 昕, 昙, 昝, 星,
+        映, 春, 昧, 昨, 昭, 是, 昱, 昴, 昵, 昶, 昼, 显, 晃, 晋, 晌, 晏, 晒, 晓, 晔, 晕, 晖, 晗, 晚, 晞, 晟,
+        晤, 晦, 晨, 普, 景, 晰, 晴, 晶, 晷, 智, 晾, 暂, 暄, 暇, 暌, 暑, 暖, 暗, 暧, 暨, 暮, 暴, 暹, 暾, 曈,
+        曙, 曜, 曝, 曦, 曰, 曲, 曳, 更, 曹, 曼, 曾, 替, 最, 月, 有, 朋, 服, 朐, 朔, 朕, 朗, 望, 朝, 期, 朦,
+        木, 未, 末, 本, 札, 术, 朱, 朴, 朵, 机, 朽, 杀, 杂, 权, 杆, 杈, 杉, 李, 杏, 材, 村, 杓, 杖, 杜, 杞,
+        束, 杠, 条, 来, 杨, 杭, 杯, 杰, 杳, 杵, 杷, 松, 板, 极, 构, 枇, 枉, 枋, 析, 枕, 林, 枚, 果, 枝, 枞,
+        枢, 枣, 枥, 枪, 枫, 枭, 枯, 枰, 枳, 架, 枷, 枸, 柃, 柄, 柏, 某, 柑, 柒, 染, 柔, 柘, 柚, 柜, 柞, 柠,
+        查, 柩, 柬, 柯, 柱, 柳, 柴, 柿, 栀, 栅, 标, 栈, 栋, 栌, 栎, 栏, 树, 栓, 栖, 栗, 校, 栩, 株, 样, 核,
+        根, 格, 栽, 栾, 桁, 桂, 桃, 框, 案, 桉, 桌, 桎, 桐, 桑, 桓, 桔, 桠, 桢, 档, 桥, 桦, 桨, 桩, 桴, 桶,
+        桷, 梁, 梅, 梆, 梏, 梓, 梗, 梢, 梦, 梧, 梨, 梭, 梯, 械, 梳, 梵, 检, 棂, 棉, 棋, 棍, 棒, 棕, 棘, 棚,
+        棠, 棣, 森, 棱, 棵, 棺, 椁, 椅, 椋, 植, 椎, 椒, 椟, 椤, 椭, 椰, 椴, 椹, 椿, 楂, 楔, 楚, 楞, 楠, 楣,
+        楷, 楸, 楼, 概, 榄, 榆, 榈, 榉, 榔, 榕, 榛, 榜, 榨, 榫, 榭, 榴, 榷, 榻, 槃, 槌, 槎, 槐, 槛, 槟, 槭,
+        槽, 槿, 樊, 樟, 模, 樨, 横, 樯, 樱, 樵, 樽, 樾, 橄, 橇, 橐, 橘, 橙, 橡, 橱, 檀, 檐, 檗, 檬, 欠, 次,
+        欢, 欣, 欧, 欲, 欸, 欺, 款, 歆, 歇, 歉, 歌, 歙, 止, 正, 此, 步, 武, 歧, 歩, 歪, 歹, 死, 歼, 殁, 殃,
+        殆, 殇, 殉, 殊, 残, 殒, 殓, 殖, 殚, 殡, 殴, 段, 殷, 殿, 毁, 毂, 毅, 毋, 母, 每, 毒, 毓, 比, 毕, 毗,
+        毙, 毛, 毡, 毫, 毯, 毽, 氏, 民, 氓, 气, 氚, 氛, 氟, 氢, 氤, 氦, 氧, 氨, 氪, 氮, 氯, 氰, 氲, 水, 永,
+        汀, 汁, 求, 汇, 汉, 汊, 汐, 汕, 汗, 汛, 汝, 汞, 江, 池, 污, 汤, 汨, 汩, 汪, 汰, 汲, 汴, 汶, 汹, 汽,
+        汾, 沁, 沂, 沃, 沅, 沈, 沉, 沌, 沏, 沐, 沓, 沙, 沛, 沟, 没, 沢, 沣, 沥, 沦, 沧, 沪, 沫, 沭, 沮, 沱,
+        河, 沸, 油, 治, 沼, 沽, 沾, 沿, 泄, 泉, 泊, 泌, 泓, 泔, 法, 泖, 泗, 泛, 泞, 泠, 泡, 波, 泣, 泥, 注,
+        泪, 泫, 泮, 泯, 泰, 泱, 泳, 泵, 泷, 泸, 泺, 泻, 泼, 泽, 泾, 洁, 洋, 洒, 洗, 洙, 洛, 洞, 津, 洪, 洮,
+        洱, 洲, 洵, 洹, 洺, 活, 洼, 洽, 派, 流, 浃, 浅, 浆, 浇, 浈, 浊, 测, 济, 浏, 浐, 浑, 浒, 浓, 浔, 浙,
+        浚, 浜, 浠, 浣, 浦, 浩, 浪, 浮, 浴, 海, 浸, 涂, 涅, 消, 涉, 涌, 涎, 涑, 涓, 涕, 涛, 涝, 涞, 涟, 涠,
+        涡, 涣, 涤, 润, 涧, 涨, 涩, 涪, 涮, 涯, 液, 涵, 涸, 涿, 淀, 淄, 淅, 淆, 淇, 淋, 淌, 淑, 淖, 淘, 淝,
+        淞, 淡, 淤, 淦, 淫, 淬, 淮, 深, 淳, 混, 淹, 添, 淼, 清, 渊, 渌, 渍, 渎, 渐, 渑, 渔, 渗, 渚, 渝, 渠,
+        渡, 渣, 渤, 渥, 温, 渭, 港, 渲, 渴, 游, 渺, 湃, 湄, 湉, 湍, 湎, 湖, 湘, 湛, 湫, 湾, 湿, 溃, 溅, 溆,
+        溉, 溏, 源, 溜, 溟, 溢, 溥, 溧, 溪, 溯, 溶, 溺, 滁, 滇, 滋, 滑, 滔, 滕, 滘, 滚, 滞, 满, 滢, 滤, 滥,
+        滦, 滨, 滩, 滴, 滹, 漂, 漆, 漉, 漏, 漓, 演, 漕, 漠, 漩, 漪, 漫, 漭, 漯, 漱, 漳, 漾, 潆, 潇, 潋, 潍,
+        潘, 潜, 潞, 潢, 潦, 潭, 潮, 潸, 潺, 潼, 澄, 澈, 澍, 澎, 澜, 澡, 澧, 澳, 澶, 激, 濂, 濑, 濒, 濠, 濡,
+        濮, 濯, 瀑, 瀚, 瀛, 灌, 灏, 灞, 火, 灭, 灯, 灰, 灵, 灶, 灸, 灼, 灾, 灿, 炀, 炅, 炉, 炊, 炎, 炒, 炔,
+        炕, 炖, 炙, 炜, 炫, 炬, 炭, 炮, 炯, 炳, 炷, 炸, 点, 炼, 炽, 烀, 烁, 烂, 烃, 烈, 烊, 烘, 烙, 烛, 烟,
+        烤, 烦, 烧, 烨, 烩, 烫, 烬, 热, 烯, 烷, 烹, 烽, 焉, 焊, 焓, 焕, 焖, 焗, 焘, 焙, 焚, 焦, 焯, 焰, 焱,
+        然, 煊, 煌, 煎, 煜, 煞, 煤, 煦, 照, 煨, 煮, 煲, 煳, 煽, 熄, 熊, 熏, 熔, 熙, 熟, 熠, 熨, 熬, 熵, 熹,
+        燃, 燊, 燎, 燕, 燥, 燮, 爆, 爪, 爬, 爱, 爵, 父, 爷, 爸, 爹, 爽, 片, 版, 牌, 牍, 牒, 牙, 牛, 牟, 牠,
+        牡, 牢, 牧, 物, 牲, 牵, 特, 牺, 牾, 犀, 犁, 犄, 犇, 犊, 犒, 犟, 犬, 犯, 状, 犷, 犸, 犹, 狂, 狄, 狈,
+        狐, 狒, 狗, 狙, 狞, 狠, 狡, 狩, 独, 狭, 狮, 狰, 狱, 狸, 狼, 猁, 猎, 猖, 猛, 猜, 猝, 猥, 猩, 猪, 猫,
+        猬, 献, 猴, 猷, 猹, 猾, 猿, 獒, 獗, 獭, 獾, 玄, 率, 玉, 王, 玑, 玖, 玛, 玟, 玥, 玩, 玫, 玮, 环, 现,
+        玲, 玳, 玷, 玹, 玺, 玻, 珀, 珂, 珈, 珉, 珊, 珍, 珏, 珑, 珙, 珞, 珠, 珥, 班, 珮, 珲, 珺, 球, 琅, 理,
+        琉, 琊, 琏, 琐, 琛, 琢, 琤, 琥, 琦, 琨, 琪, 琬, 琮, 琰, 琳, 琴, 琵, 琶, 琼, 瑁, 瑄, 瑕, 瑙, 瑚, 瑛,
+        瑜, 瑞, 瑟, 瑠, 瑭, 瑰, 瑶, 瑷, 瑾, 璀, 璃, 璇, 璋, 璐, 璞, 璟, 璧, 璨, 瓜, 瓢, 瓣, 瓦, 瓮, 瓯, 瓶,
+        瓷, 甄, 甘, 甚, 甜, 生, 甥, 用, 甩, 甫, 甬, 甭, 田, 由, 甲, 申, 电, 男, 甸, 町, 画, 畅, 畈, 畊, 界,
+        畏, 畔, 留, 畜, 略, 番, 畴, 畸, 畿, 疃, 疆, 疏, 疑, 疖, 疗, 疙, 疚, 疝, 疟, 疡, 疣, 疤, 疫, 疮, 疯,
+        疱, 疲, 疴, 疵, 疸, 疹, 疼, 疽, 疾, 病, 症, 痉, 痊, 痍, 痒, 痔, 痕, 痘, 痛, 痞, 痢, 痣, 痧, 痨, 痪,
+        痫, 痰, 痱, 痴, 痹, 痼, 瘀, 瘁, 瘙, 瘟, 瘠, 瘢, 瘤, 瘦, 瘩, 瘪, 瘫, 瘳, 瘴, 瘸, 瘾, 癌, 癖, 癜, 癞,
+        癣, 癫, 登, 白, 百, 皂, 的, 皆, 皇, 皋, 皎, 皑, 皓, 皖, 皙, 皮, 皱, 皿, 盂, 盅, 盆, 盈, 益, 盎, 盏,
+        盐, 监, 盒, 盔, 盖, 盗, 盘, 盛, 盟, 目, 盯, 盱, 盲, 直, 相, 盹, 盼, 盾, 省, 眈, 眉, 看, 眙, 真, 眠,
+        眨, 眩, 眬, 眯, 眶, 眷, 眸, 眺, 眼, 着, 睁, 睇, 睐, 睑, 睛, 睡, 睢, 督, 睦, 睫, 睬, 睹, 睽, 睾, 睿,
+        瞄, 瞅, 瞌, 瞎, 瞑, 瞒, 瞟, 瞠, 瞥, 瞧, 瞩, 瞪, 瞬, 瞭, 瞰, 瞳, 瞻, 瞿, 矍, 矗, 矛, 矜, 矢, 矣, 知,
+        矩, 矫, 矬, 短, 矮, 石, 矶, 矸, 矽, 矾, 矿, 砀, 码, 砂, 砌, 砍, 砒, 研, 砖, 砚, 砝, 砣, 砥, 砭, 砰,
+        破, 砷, 砸, 砺, 砼, 砾, 础, 硅, 硌, 硒, 硕, 硖, 硚, 硝, 硫, 硬, 确, 硼, 碉, 碌, 碍, 碎, 碑, 碓, 碗,
+        碘, 碚, 碜, 碟, 碣, 碧, 碰, 碱, 碳, 碴, 碾, 磁, 磅, 磊, 磋, 磐, 磕, 磨, 磴, 磷, 磺, 礁, 示, 礼, 社,
+        祀, 祁, 祈, 祉, 祎, 祐, 祖, 祚, 祛, 祝, 神, 祟, 祠, 祢, 祥, 票, 祭, 祯, 祷, 祸, 祺, 禀, 禁, 禄, 禅,
+        福, 禧, 禹, 禺, 离, 禽, 禾, 秀, 私, 秃, 秆, 秉, 秋, 种, 科, 秒, 秘, 租, 秣, 秤, 秦, 秧, 秩, 积, 称,
+        秸, 移, 秽, 稀, 程, 稍, 税, 稔, 稚, 稞, 稠, 稣, 稳, 稷, 稹, 稻, 稼, 稽, 稿, 穆, 穗, 穴, 究, 穷, 穹,
+        空, 穿, 突, 窃, 窄, 窈, 窍, 窑, 窒, 窕, 窖, 窗, 窘, 窜, 窝, 窟, 窠, 窥, 窦, 窨, 窿, 立, 竖, 站, 竞,
+        竟, 章, 竣, 童, 竭, 端, 竹, 竺, 竽, 竿, 笃, 笆, 笈, 笋, 笑, 笔, 笙, 笛, 笠, 符, 笨, 第, 笳, 笸, 笼,
+        等, 筋, 筏, 筐, 筑, 筒, 答, 策, 筛, 筝, 筠, 筱, 筵, 筷, 筹, 签, 简, 箍, 箔, 箕, 算, 管, 箩, 箫, 箭,
+        箱, 箴, 篁, 篆, 篇, 篑, 篓, 篝, 篡, 篦, 篪, 篮, 篱, 篷, 篼, 簇, 簋, 簧, 簪, 簸, 簿, 籁, 籍, 米, 类,
+        籼, 籽, 粉, 粑, 粒, 粕, 粗, 粘, 粟, 粤, 粥, 粪, 粮, 粱, 粲, 粳, 粹, 粼, 粽, 精, 糊, 糕, 糖, 糗, 糙,
+        糟, 糠, 糯, 系, 紊, 素, 索, 紧, 紫, 累, 絮, 綦, 繁, 纂, 纠, 纡, 红, 纣, 纤, 约, 级, 纨, 纪, 纫, 纬,
+        纭, 纯, 纰, 纱, 纲, 纳, 纵, 纶, 纷, 纸, 纹, 纺, 纽, 纾, 线, 绀, 练, 组, 绅, 细, 织, 终, 绉, 绊, 绋,
+        绌, 绍, 绎, 经, 绑, 绒, 结, 绔, 绕, 绘, 给, 绚, 绛, 络, 绝, 绞, 统, 绢, 绣, 绥, 继, 绩, 绪, 绫, 续,
+        绮, 绯, 绰, 绳, 维, 绵, 绷, 绸, 绻, 综, 绽, 绿, 缀, 缄, 缅, 缆, 缇, 缉, 缎, 缓, 缔, 缕, 编, 缘, 缙,
+        缚, 缛, 缜, 缝, 缠, 缢, 缤, 缨, 缩, 缪, 缬, 缭, 缮, 缰, 缱, 缴, 缸, 缺, 罂, 罄, 罐, 网, 罔, 罕, 罗,
+        罚, 罡, 罢, 罩, 罪, 置, 署, 罹, 羁, 羊, 羌, 美, 羔, 羚, 羞, 羡, 群, 羧, 羯, 羲, 羸, 羹, 羽, 羿, 翁,
+        翅, 翊, 翌, 翎, 翔, 翘, 翟, 翠, 翡, 翩, 翰, 翱, 翻, 翼, 耀, 老, 考, 耄, 者, 耆, 耋, 而, 耍, 耐, 耒,
+        耕, 耗, 耘, 耙, 耜, 耪, 耳, 耶, 耷, 耸, 耻, 耽, 耿, 聂, 聆, 聊, 聋, 职, 联, 聘, 聚, 聪, 肃, 肆, 肇,
+        肉, 肋, 肌, 肖, 肘, 肚, 肛, 肝, 肠, 股, 肢, 肤, 肥, 肩, 肪, 肮, 肯, 肱, 育, 肴, 肺, 肾, 肿, 胀, 胁,
+        胃, 胆, 背, 胎, 胖, 胗, 胚, 胛, 胜, 胞, 胡, 胤, 胥, 胧, 胫, 胭, 胯, 胰, 胱, 胳, 胶, 胸, 胺, 能, 脂,
+        脆, 脉, 脊, 脍, 脏, 脐, 脑, 脓, 脖, 脚, 脯, 脱, 脸, 脾, 腆, 腈, 腊, 腋, 腌, 腐, 腑, 腓, 腔, 腕, 腥,
+        腩, 腭, 腮, 腰, 腱, 腴, 腹, 腺, 腻, 腼, 腾, 腿, 膀, 膈, 膊, 膏, 膑, 膛, 膜, 膝, 膨, 膳, 膺, 臀, 臂,
+        臃, 臆, 臊, 臣, 臧, 自, 臬, 臭, 至, 致, 臻, 臼, 舀, 舅, 舆, 舌, 舍, 舐, 舒, 舔, 舛, 舜, 舞, 舟, 航,
+        舫, 般, 舰, 舱, 舵, 舶, 舷, 舸, 船, 艇, 艋, 艘, 艮, 良, 艰, 色, 艳, 艺, 艾, 艿, 节, 芊, 芋, 芍, 芒,
+        芗, 芙, 芜, 芝, 芥, 芦, 芩, 芪, 芬, 芭, 芮, 芯, 花, 芳, 芷, 芸, 芹, 芽, 芾, 苇, 苋, 苍, 苏, 苑, 苓,
+        苔, 苗, 苛, 苞, 苟, 苡, 苣, 若, 苦, 苫, 苯, 英, 苷, 苹, 茁, 茂, 范, 茄, 茅, 茆, 茉, 茌, 茎, 茗, 茛,
+        茜, 茧, 茨, 茫, 茬, 茯, 茱, 茳, 茴, 茵, 茶, 茸, 茹, 茼, 荀, 荃, 荆, 荇, 草, 荏, 荐, 荒, 荔, 荚, 荛,
+        荞, 荟, 荠, 荡, 荣, 荤, 荧, 荨, 荫, 药, 荷, 荸, 荻, 荼, 莅, 莆, 莉, 莎, 莒, 莓, 莘, 莜, 莞, 莠, 莪,
+        莫, 莱, 莲, 莴, 获, 莹, 莺, 莽, 菀, 菁, 菅, 菇, 菊, 菌, 菏, 菖, 菘, 菜, 菠, 菡, 菩, 菱, 菲, 萃, 萄,
+        萋, 萌, 萍, 萎, 萝, 萤, 营, 萦, 萧, 萨, 萱, 萸, 落, 葆, 著, 葚, 葛, 葡, 董, 葩, 葫, 葬, 葱, 葳, 葵,
+        葺, 蒂, 蒋, 蒙, 蒜, 蒯, 蒲, 蒸, 蒿, 蓁, 蓄, 蓉, 蓓, 蓝, 蓟, 蓥, 蓦, 蓬, 蓼, 蔑, 蔓, 蔗, 蔚, 蔡, 蔫,
+        蔬, 蔷, 蔺, 蔻, 蔼, 蔽, 蕃, 蕉, 蕊, 蕙, 蕨, 蕲, 蕴, 蕾, 薄, 薇, 薏, 薛, 薪, 薯, 薰, 薷, 藁, 藉, 藏,
+        藐, 藓, 藕, 藜, 藠, 藤, 藩, 藻, 藿, 蘑, 蘸, 虎, 虏, 虐, 虑, 虔, 虚, 虞, 虫, 虱, 虹, 虻, 虽, 虾, 蚀,
+        蚁, 蚂, 蚊, 蚌, 蚓, 蚕, 蚝, 蚣, 蚤, 蚪, 蚬, 蚯, 蚱, 蚴, 蛀, 蛆, 蛇, 蛉, 蛊, 蛋, 蛎, 蛐, 蛔, 蛙, 蛛,
+        蛟, 蛤, 蛮, 蛰, 蛳, 蛹, 蛾, 蜀, 蜂, 蜃, 蜇, 蜈, 蜊, 蜍, 蜒, 蜓, 蜕, 蜗, 蜘, 蜚, 蜜, 蜡, 蜢, 蜥, 蜱,
+        蜴, 蜷, 蜻, 蜿, 蝇, 蝈, 蝉, 蝌, 蝎, 蝗, 蝙, 蝠, 蝮, 蝴, 蝶, 蝽, 螂, 螃, 螈, 融, 螨, 螳, 螺, 蟀, 蟆,
+        蟊, 蟋, 蟑, 蟒, 蟠, 蟹, 蟾, 蠊, 蠕, 蠡, 蠢, 血, 衅, 行, 衍, 衔, 街, 衙, 衡, 衢, 衣, 补, 表, 衩, 衫,
+        衬, 衮, 衰, 衲, 衷, 袁, 袂, 袄, 袅, 袈, 袋, 袍, 袒, 袖, 袜, 被, 袭, 袱, 裁, 裂, 装, 裆, 裔, 裕, 裘,
+        裙, 裟, 裤, 裨, 裱, 裳, 裴, 裸, 裹, 褂, 褐, 褒, 褓, 褔, 褚, 褛, 褥, 褪, 褴, 褶, 襁, 襄, 襟, 西, 要,
+        覃, 覆, 见, 观, 规, 觅, 视, 览, 觉, 觊, 觎, 觐, 觑, 角, 觞, 解, 觥, 触, 言, 訾, 詹, 誉, 誓, 警, 譬,
+        计, 订, 讣, 认, 讥, 讧, 讨, 让, 讪, 训, 议, 讯, 记, 讲, 讳, 讴, 讶, 讷, 许, 讹, 论, 讼, 讽, 设, 访,
+        诀, 证, 诃, 评, 诅, 识, 诈, 诉, 诊, 诋, 词, 诏, 译, 诓, 试, 诗, 诘, 诙, 诚, 诛, 话, 诞, 诟, 诠, 诡,
+        询, 诣, 诤, 该, 详, 诧, 诩, 诫, 诬, 语, 误, 诱, 诲, 说, 诵, 诶, 请, 诸, 诹, 诺, 读, 诽, 课, 诿, 谀,
+        谁, 调, 谅, 谆, 谈, 谊, 谋, 谌, 谍, 谎, 谏, 谐, 谑, 谓, 谕, 谖, 谘, 谙, 谚, 谛, 谜, 谟, 谢, 谣, 谤,
+        谦, 谧, 谨, 谩, 谬, 谭, 谮, 谯, 谱, 谴, 谶, 谷, 豁, 豆, 豇, 豉, 豌, 豚, 象, 豢, 豪, 豫, 豹, 豺, 貂,
+        貅, 貉, 貌, 貔, 贝, 贞, 负, 贡, 财, 责, 贤, 败, 账, 货, 质, 贩, 贪, 贫, 贬, 购, 贮, 贯, 贰, 贱, 贲,
+        贴, 贵, 贷, 贸, 费, 贺, 贻, 贼, 贾, 贿, 赁, 赂, 赃, 资, 赅, 赈, 赉, 赊, 赋, 赌, 赎, 赏, 赐, 赓, 赔,
+        赖, 赘, 赚, 赛, 赝, 赞, 赠, 赡, 赢, 赣, 赤, 赦, 赫, 走, 赳, 赴, 赵, 赶, 起, 趁, 超, 越, 趋, 趟, 趣,
+        足, 趴, 趵, 趸, 趺, 趾, 跃, 跄, 跆, 跋, 跌, 跎, 跑, 跚, 跛, 距, 跟, 跤, 跨, 跪, 跬, 路, 跳, 践, 跶,
+        跷, 跹, 跺, 跻, 踉, 踊, 踌, 踏, 踝, 踞, 踢, 踩, 踪, 踮, 踯, 踱, 踵, 踹, 踺, 蹁, 蹂, 蹄, 蹈, 蹉, 蹊,
+        蹋, 蹒, 蹚, 蹦, 蹩, 蹬, 蹭, 蹲, 蹴, 蹶, 蹼, 蹿, 躁, 躅, 躇, 躏, 身, 躬, 躯, 躲, 躺, 车, 轧, 轨, 轩,
+        轫, 转, 轮, 软, 轰, 轱, 轲, 轳, 轴, 轶, 轸, 轻, 轼, 载, 轿, 较, 辄, 辅, 辆, 辈, 辉, 辊, 辍, 辐, 辑,
+        输, 辕, 辖, 辗, 辘, 辙, 辛, 辜, 辞, 辟, 辣, 辨, 辩, 辫, 辰, 辱, 边, 辽, 达, 迁, 迂, 迄, 迅, 过, 迈,
+        迎, 运, 近, 返, 还, 这, 进, 远, 违, 连, 迟, 迢, 迥, 迦, 迩, 迪, 迫, 迭, 述, 迷, 迸, 迹, 追, 退, 送,
+        适, 逃, 逅, 逆, 选, 逊, 逋, 逍, 透, 逐, 逑, 递, 途, 逗, 通, 逛, 逝, 逞, 速, 造, 逡, 逢, 逮, 逯, 逵,
+        逸, 逻, 逼, 逾, 遁, 遂, 遇, 遍, 遏, 遐, 遑, 道, 遗, 遛, 遢, 遣, 遥, 遨, 遭, 遮, 遴, 遵, 避, 邀, 邂,
+        邃, 邋, 邑, 邓, 邕, 邙, 邛, 邝, 邡, 邢, 那, 邦, 邪, 邬, 邮, 邯, 邰, 邱, 邳, 邵, 邸, 邹, 邺, 邻, 郁,
+        郅, 郇, 郊, 郎, 郑, 郓, 郜, 郝, 郡, 郧, 部, 郫, 郭, 郯, 郴, 郸, 都, 鄂, 鄙, 鄞, 鄢, 鄱, 酉, 酊, 酋,
+        酌, 配, 酐, 酒, 酗, 酚, 酝, 酞, 酣, 酥, 酩, 酪, 酬, 酮, 酯, 酰, 酱, 酵, 酶, 酷, 酸, 酿, 醇, 醉, 醋,
+        醍, 醐, 醒, 醛, 醺, 采, 釉, 释, 里, 重, 野, 量, 金, 釜, 鉴, 銮, 鏖, 鑫, 钇, 针, 钉, 钊, 钎, 钏, 钐,
+        钒, 钓, 钗, 钙, 钛, 钜, 钝, 钞, 钟, 钠, 钢, 钣, 钥, 钦, 钧, 钨, 钩, 钮, 钯, 钰, 钱, 钲, 钳, 钴, 钵,
+        钻, 钼, 钾, 钿, 铀, 铁, 铂, 铃, 铄, 铅, 铆, 铉, 铋, 铍, 铎, 铐, 铑, 铖, 铛, 铜, 铝, 铟, 铠, 铡, 铣,
+        铤, 铧, 铨, 铩, 铬, 铭, 铮, 铰, 铲, 银, 铷, 铸, 铺, 链, 铿, 销, 锁, 锂, 锄, 锅, 锆, 锈, 锉, 锋, 锌,
+        锏, 锐, 锑, 锒, 错, 锚, 锟, 锡, 锢, 锣, 锤, 锥, 锦, 锨, 锭, 键, 锯, 锰, 锲, 锴, 锵, 锷, 锹, 锻, 镀,
+        镁, 镂, 镇, 镉, 镊, 镌, 镍, 镏, 镐, 镑, 镔, 镕, 镖, 镜, 镣, 镭, 镯, 镰, 镳, 镶, 长, 门, 闩, 闪, 闫,
+        闭, 问, 闯, 闰, 闲, 闳, 间, 闵, 闷, 闸, 闹, 闺, 闻, 闽, 闾, 阀, 阁, 阂, 阄, 阅, 阆, 阉, 阎, 阐, 阑,
+        阔, 阕, 阖, 阙, 阚, 阜, 队, 阡, 阪, 阮, 阱, 防, 阳, 阴, 阵, 阶, 阻, 阿, 陀, 陂, 附, 际, 陆, 陇, 陈,
+        陉, 陋, 陌, 降, 限, 陕, 陛, 陡, 院, 除, 陨, 险, 陪, 陬, 陵, 陶, 陷, 隅, 隆, 隋, 隍, 随, 隐, 隔, 隗,
+        隘, 隙, 障, 隧, 隶, 隼, 隽, 难, 雀, 雁, 雄, 雅, 集, 雇, 雉, 雌, 雍, 雏, 雒, 雕, 雨, 雪, 雯, 雳, 零,
+        雷, 雹, 雾, 需, 霁, 霄, 霆, 震, 霈, 霉, 霍, 霎, 霏, 霓, 霖, 霜, 霞, 霪, 露, 霸, 霹, 霾, 靑, 青, 靓,
+        靖, 静, 靛, 非, 靠, 靡, 面, 革, 靳, 靴, 靶, 鞅, 鞋, 鞍, 鞑, 鞘, 鞠, 鞭, 韦, 韧, 韩, 韫, 韬, 韭, 音,
+        韵, 韶, 页, 顶, 顷, 项, 顺, 须, 顽, 顾, 顿, 颀, 颁, 颂, 预, 颅, 领, 颇, 颈, 颊, 颌, 颍, 颐, 频, 颓,
+        颖, 颗, 题, 颚, 颜, 额, 颠, 颢, 颤, 颦, 颧, 风, 飒, 飓, 飘, 飙, 飚, 飞, 食, 飧, 餍, 餐, 餮, 饕, 饥,
+        饨, 饪, 饭, 饮, 饯, 饰, 饱, 饲, 饴, 饵, 饶, 饷, 饺, 饼, 饽, 饿, 馀, 馁, 馄, 馅, 馆, 馈, 馊, 馋, 馍,
+        馏, 馑, 馒, 馕, 首, 馗, 香, 馥, 馨, 马, 驭, 驮, 驯, 驰, 驱, 驳, 驴, 驶, 驷, 驸, 驹, 驻, 驼, 驾, 驿,
+        骁, 骂, 骄, 骅, 骆, 骇, 骈, 骊, 骋, 验, 骏, 骐, 骑, 骓, 骗, 骚, 骛, 骜, 骝, 骞, 骠, 骡, 骤, 骥, 骨,
+        骰, 骷, 骸, 骺, 骼, 髂, 髅, 髋, 髌, 髓, 高, 髦, 髯, 鬃, 鬓, 鬟, 鬼, 魁, 魂, 魄, 魅, 魇, 魉, 魍, 魏,
+        魔, 魟, 鱼, 鱿, 鲁, 鲅, 鲈, 鲍, 鲑, 鲜, 鲟, 鲠, 鲢, 鲤, 鲨, 鲫, 鲭, 鲳, 鲶, 鲷, 鲸, 鲼, 鳃, 鳄, 鳅,
+        鳌, 鳍, 鳕, 鳖, 鳗, 鳝, 鳞, 鳟, 鸟, 鸠, 鸡, 鸢, 鸣, 鸥, 鸦, 鸩, 鸪, 鸫, 鸭, 鸯, 鸳, 鸵, 鸽, 鸾, 鸿,
+        鹁, 鹂, 鹃, 鹅, 鹉, 鹊, 鹌, 鹏, 鹑, 鹜, 鹞, 鹤, 鹦, 鹧, 鹫, 鹭, 鹰, 鹳, 鹿, 麂, 麋, 麒, 麓, 麝, 麟,
+        麦, 麸, 麻, 麾, 黄, 黍, 黎, 黏, 黑, 黔, 默, 黛, 黝, 黟, 黯, 鼎, 鼓, 鼠, 鼬, 鼹, 鼻, 鼾, 齐, 齿, 龃,
+        龄, 龅, 龈, 龉, 龊, 龌, 龙, 龚, 龟, "\U0002B5AF", "\U0002B689"]
+  encoder_params:
+    header: {full_spec: nemo.collections.asr.JasperEncoder}
+    init_params:
+      activation: relu
+      conv_mask: true
+      feat_in: 64
+      jasper:
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 1
+        residual: false
+        separable: true
+        stride: [2]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [2]
+        dropout: 0.0
+        filters: 512
+        kernel: [87]
+        repeat: 1
+        residual: false
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 1024
+        kernel: [1]
+        repeat: 1
+        residual: false
+        stride: [1]
+  preprocessor_params:
+    header: {full_spec: nemo.collections.asr.AudioToMelSpectrogramPreprocessor}
+    init_params: {dither: 1e-05, features: 64, n_fft: 512, normalize: per_feature,
+      pad_to: 16, stft_conv: true, window: hann, window_size: 0.02, window_stride: 0.01}
+  spec_augment_params:
+    header: {full_spec: nemo.collections.asr.SpectrogramAugmentation}
+    init_params: {rect_freq: 50, rect_masks: 5, rect_time: 120}
diff --git a/examples/asr/configs/quartznet15x5.yaml b/examples/asr/configs/quartznet15x5.yaml
index f3a4507ff41c..3e17841d774e 100644
--- a/examples/asr/configs/quartznet15x5.yaml
+++ b/examples/asr/configs/quartznet15x5.yaml
@@ -1,198 +1,167 @@
-model: "QuartzNet"
-sample_rate: 16000
-
-AudioToTextDataLayer:
-    max_duration: 16.7
-    trim_silence: true
-
-    train:
-        shuffle: true
-
-    eval:
-        shuffle: false
-        max_duration: null
-
-AudioToMelSpectrogramPreprocessor:
-    window_size: 0.02
-    window_stride: 0.01
-    window: "hann"
-    normalize: "per_feature"
-    n_fft: 512
-    features: 64
-    dither: 0.00001
-    pad_to: 16
-    stft_conv: true
-
-SpectrogramAugmentation:
-    rect_masks: 5
-    rect_time: 120
-    rect_freq: 50
-
-JasperEncoder:
-    activation: "relu"
-    conv_mask: true
-
-    jasper:
-        -   filters: 256
-            repeat: 1
-            kernel: [33]
-            stride: [2]
-            dilation: [1]
-            dropout: 0.0
-            residual: false
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [33]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [33]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [33]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [39]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [39]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 256
-            repeat: 5
-            kernel: [39]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [51]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [51]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [51]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [63]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [63]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [63]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [75]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [75]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 5
-            kernel: [75]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: true
-            separable: true
-
-        -   filters: 512
-            repeat: 1
-            kernel: [87]
-            stride: [1]
-            dilation: [2]
-            dropout: 0.0
-            residual: false
-            separable: true
-
-        -   filters: 1024
-            repeat: 1
-            kernel: [1]
-            stride: [1]
-            dilation: [1]
-            dropout: 0.0
-            residual: false
-
-labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
-         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+header: {collection_type: asr, collection_version: null, full_spec: nemo.collections.asr.models.asrconvctcmodel.QuartzNet,
+  nemo_core_version: 0.11.0b0}
+init_params:
+  decoder_params:
+    header: {full_spec: nemo.collections.asr.JasperDecoderForCTC}
+    init_params:
+      feat_in: 1024
+      num_classes: 28
+      vocabulary: [' ', a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t,
+        u, v, w, x, y, z, '''']
+  encoder_params:
+    header: {full_spec: nemo.collections.asr.JasperEncoder}
+    init_params:
+      activation: relu
+      conv_mask: true
+      feat_in: 64
+      jasper:
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 1
+        residual: false
+        separable: true
+        stride: [2]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [33]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 256
+        kernel: [39]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [51]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [63]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 512
+        kernel: [75]
+        repeat: 5
+        residual: true
+        separable: true
+        stride: [1]
+      - dilation: [2]
+        dropout: 0.0
+        filters: 512
+        kernel: [87]
+        repeat: 1
+        residual: false
+        separable: true
+        stride: [1]
+      - dilation: [1]
+        dropout: 0.0
+        filters: 1024
+        kernel: [1]
+        repeat: 1
+        residual: false
+        stride: [1]
+  preprocessor_params:
+    header: {full_spec: nemo.collections.asr.AudioToMelSpectrogramPreprocessor}
+    init_params: {dither: 1e-05, features: 64, n_fft: 512, normalize: per_feature,
+      pad_to: 16, stft_conv: true, window: hann, window_size: 0.02, window_stride: 0.01}
+  spec_augment_params:
+    header: {full_spec: nemo.collections.asr.SpectrogramAugmentation}
+    init_params: {rect_freq: 50, rect_masks: 5, rect_time: 120}
diff --git a/examples/asr/configs/quartznet15x5_8kHz.yaml b/examples/asr/configs/quartznet15x5_8kHz.yaml
new file mode 100644
index 000000000000..3bbe1019e460
--- /dev/null
+++ b/examples/asr/configs/quartznet15x5_8kHz.yaml
@@ -0,0 +1,198 @@
+model: "QuartzNet"
+sample_rate: 8000
+
+AudioToTextDataLayer:
+    max_duration: 16.7
+    trim_silence: true
+
+    train:
+        shuffle: true
+
+    eval:
+        shuffle: false
+        max_duration: null
+
+AudioToMelSpectrogramPreprocessor:
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    normalize: "per_feature"
+    n_fft: 512
+    features: 64
+    dither: 0.00001
+    pad_to: 16
+    stft_conv: true
+
+SpectrogramAugmentation:
+    rect_masks: 5
+    rect_time: 120
+    rect_freq: 50
+
+JasperEncoder:
+    activation: "relu"
+    conv_mask: true
+
+    jasper:
+        -   filters: 256
+            repeat: 1
+            kernel: [33]
+            stride: [2]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [33]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [33]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [33]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [39]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [39]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 256
+            repeat: 5
+            kernel: [39]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [51]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [51]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [51]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [63]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [63]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [63]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [75]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [75]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 5
+            kernel: [75]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: true
+            separable: true
+
+        -   filters: 512
+            repeat: 1
+            kernel: [87]
+            stride: [1]
+            dilation: [2]
+            dropout: 0.0
+            residual: false
+            separable: true
+
+        -   filters: 1024
+            repeat: 1
+            kernel: [1]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+
+labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
diff --git a/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml b/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml
new file mode 100644
index 000000000000..8b1d92183eed
--- /dev/null
+++ b/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml
@@ -0,0 +1,123 @@
+model: "QuartzNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.0
+repeat:  &rep  1
+augment: true
+lr_schedule: "PolynomialHoldDecayAnnealing"
+lr_warmup_proportion: 0.1
+kernel_size_factor: &kfactor 1.0
+
+AudioToSpeechLabelDataLayer:
+    #sample_rate: *sample_rate
+    train:
+        shuffle: true
+    eval:
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: true
+    n_fft: 512
+
+AudioToMFCCPreprocessor:
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    n_mels: *n_mels
+    n_mfcc: *n_mels
+    n_fft: 512
+
+AudioAugmentor:
+    shift:
+        prob: 1.0
+        min_shift_ms: -5.0
+        max_shift_ms: 5.0
+    white_noise:
+        prob: 1.0
+        min_level: -90
+        max_level: -46
+
+SpectrogramAugmentation:
+    freq_masks: 2
+    time_masks: 2
+    freq_width: 15
+    time_width: 25
+    rect_masks: 5
+    rect_time: 25
+    rect_freq: 15
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+    conv_mask: true
+
+    jasper:
+        - filters: 128
+          repeat: 1
+          kernel: [11]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [13]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [15]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [17]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 128
+          repeat: 1
+          kernel: [29]
+          stride: [1]
+          dilation: [2]
+          dropout: *drop
+          residual: false
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 128
+          repeat: 1
+          kernel: [1]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+
+JasperDecoderForClassification:
+    return_logits: True
+    pooling_type: 'avg'
+
+labels: ['bed', 'bird', 'cat', 'dog', 'down', 'eight', 'five', 'four', 'go', 'happy', 'house', 'left', 'marvin', 'nine', 'no', 'off', 'on', 'one', 'right', 'seven', 'sheila',            'six', 'stop', 'three', 'tree', 'two', 'up', 'wow', 'yes', 'zero']
diff --git a/examples/asr/configs/quartznet_speech_commands_3x1_v2.yaml b/examples/asr/configs/quartznet_speech_commands_3x1_v2.yaml
new file mode 100644
index 000000000000..c685dd172c39
--- /dev/null
+++ b/examples/asr/configs/quartznet_speech_commands_3x1_v2.yaml
@@ -0,0 +1,123 @@
+model: "QuartzNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.0
+repeat:  &rep  1
+augment: true
+lr_schedule: "PolynomialHoldDecayAnnealing"
+lr_warmup_proportion: 0.1
+kernel_size_factor: &kfactor 1.0
+
+AudioToSpeechLabelDataLayer:
+    #sample_rate: *sample_rate
+    train:
+        shuffle: true
+    eval:
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: true
+    n_fft: 512
+
+AudioToMFCCPreprocessor:
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    n_mels: *n_mels
+    n_mfcc: *n_mels
+    n_fft: 512
+
+AudioAugmentor:
+    shift:
+        prob: 1.0
+        min_shift_ms: -5.0
+        max_shift_ms: 5.0
+    white_noise:
+        prob: 1.0
+        min_level: -90
+        max_level: -46
+
+SpectrogramAugmentation:
+    freq_masks: 2
+    time_masks: 2
+    freq_width: 15
+    time_width: 25
+    rect_masks: 5
+    rect_time: 25
+    rect_freq: 15
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+    conv_mask: true
+
+    jasper:
+        - filters: 128
+          repeat: 1
+          kernel: [11]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [13]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [15]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 64
+          repeat: *rep
+          kernel: [17]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 128
+          repeat: 1
+          kernel: [29]
+          stride: [1]
+          dilation: [2]
+          dropout: *drop
+          residual: false
+          separable: true
+          kernel_size_factor: *kfactor
+
+        - filters: 128
+          repeat: 1
+          kernel: [1]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+
+JasperDecoderForClassification:
+    return_logits: True
+    pooling_type: 'avg'
+
+labels: ['visual', 'wow', 'learn', 'backward', 'dog', 'two', 'left', 'happy', 'nine', 'go', 'up', 'bed', 'stop', 'one', 'zero', 'tree', 'seven', 'on', 'four', 'bird', 'right', 'eight', 'no', 'six', 'forward', 'house', 'marvin', 'sheila', 'five', 'off', 'three', 'down', 'cat', 'follow', 'yes']
\ No newline at end of file
diff --git a/examples/asr/experimental/contextnet.py b/examples/asr/experimental/contextnet.py
new file mode 100644
index 000000000000..2857bb7f0b44
--- /dev/null
+++ b/examples/asr/experimental/contextnet.py
@@ -0,0 +1,323 @@
+# Copyright (C) NVIDIA CORPORATION. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.****
+
+import argparse
+import copy
+import os
+from functools import partial
+
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.asr.helpers import monitor_asr_train_progress, process_evaluation_batch, process_evaluation_epoch
+from nemo.utils import logging
+from nemo.utils.lr_policies import CosineAnnealing
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description='ContextNet', conflict_handler='resolve',
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="novograd",
+        batch_size=32,
+        eval_batch_size=64,
+        lr=0.01,
+        weight_decay=0.001,
+        amp_opt_level="O0",
+        create_tb_writer=True,
+    )
+
+    # Overwrite default args
+    parser.add_argument(
+        "--num_epochs",
+        type=int,
+        default=None,
+        required=True,
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
+    )
+    parser.add_argument(
+        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
+    )
+
+    # Create new args
+    parser.add_argument("--exp_name", default="ContextNet", type=str)
+    parser.add_argument("--project", default=None, type=str)
+    parser.add_argument("--beta1", default=0.95, type=float)
+    parser.add_argument("--beta2", default=0.5, type=float)
+    parser.add_argument("--warmup_steps", default=1000, type=int)
+    parser.add_argument("--warmup_ratio", default=None, type=float)
+    parser.add_argument('--min_lr', default=1e-5, type=float)
+    parser.add_argument("--load_dir", default=None, type=str)
+    parser.add_argument("--synced_bn", action='store_true', help="Use synchronized batch norm")
+    parser.add_argument("--synced_bn_groupsize", default=0, type=int)
+    parser.add_argument("--update_freq", default=50, type=int, help="Metrics update freq")
+    parser.add_argument("--eval_freq", default=1000, type=int, help="Evaluation frequency")
+    parser.add_argument('--kernel_size_factor', default=1.0, type=float)
+
+    args = parser.parse_args()
+    if args.max_steps is not None:
+        raise ValueError("ContextNet uses num_epochs instead of max_steps")
+
+    return args
+
+
+def construct_name(name, lr, batch_size, num_epochs, wd, optimizer, kernel_size_factor):
+    return "{0}-lr_{1}-bs_{2}-e_{3}-wd_{4}-opt_{5}-kf_{6}".format(
+        name, lr, batch_size, num_epochs, wd, optimizer, kernel_size_factor
+    )
+
+
+def create_all_dags(args, neural_factory):
+    '''
+    creates train and eval dags as well as their callbacks
+    returns train loss tensor and callbacks'''
+
+    # parse the config files
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as f:
+        contextnet_params = yaml.load(f)
+
+    vocab = contextnet_params['labels']
+    sample_rate = contextnet_params['sample_rate']
+
+    # Calculate num_workers for dataloader
+    total_cpus = os.cpu_count()
+    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)
+
+    # create data layer for training
+    train_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"])
+    train_dl_params.update(contextnet_params["AudioToTextDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+    # del train_dl_params["normalize_transcripts"]
+
+    data_layer_train = nemo_asr.AudioToTextDataLayer(
+        manifest_filepath=args.train_dataset,
+        sample_rate=sample_rate,
+        labels=vocab,
+        batch_size=args.batch_size,
+        num_workers=cpu_per_traindl,
+        **train_dl_params,
+    )
+
+    N = len(data_layer_train)
+    steps_per_epoch = int(N / (args.batch_size * args.iter_per_step * args.num_gpus))
+
+    # create separate data layers for eval
+    # we need separate eval dags for separate eval datasets
+    # but all other modules in these dags will be shared
+
+    eval_dl_params = copy.deepcopy(contextnet_params["AudioToTextDataLayer"])
+    eval_dl_params.update(contextnet_params["AudioToTextDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+
+    data_layers_eval = []
+    if args.eval_datasets:
+        for eval_dataset in args.eval_datasets:
+            data_layer_eval = nemo_asr.AudioToTextDataLayer(
+                manifest_filepath=eval_dataset,
+                sample_rate=sample_rate,
+                labels=vocab,
+                batch_size=args.eval_batch_size,
+                num_workers=cpu_per_traindl,
+                **eval_dl_params,
+            )
+
+            data_layers_eval.append(data_layer_eval)
+    else:
+        logging.warning("There were no val datasets passed")
+
+    # create shared modules
+
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+        sample_rate=sample_rate, **contextnet_params["AudioToMelSpectrogramPreprocessor"],
+    )
+
+    # Inject the `kernel_size_factor` kwarg to the ContextNet config
+    # Skip the last layer  as that must be a pointwise kernel
+    for idx in range(len(contextnet_params["ContextNetEncoder"]["jasper"]) - 1):
+        contextnet_params["ContextNetEncoder"]["jasper"][idx]["kernel_size_factor"] = args.kernel_size_factor
+
+    # (ContextNet uses the Jasper baseline encoder and decoder)
+    encoder = nemo_asr.ContextNetEncoder(
+        feat_in=contextnet_params["AudioToMelSpectrogramPreprocessor"]["features"],
+        **contextnet_params["ContextNetEncoder"],
+    )
+
+    decoder = nemo_asr.JasperDecoderForCTC(
+        feat_in=contextnet_params["ContextNetEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab),
+    )
+
+    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab), zero_infinity=True)
+
+    greedy_decoder = nemo_asr.GreedyCTCDecoder()
+
+    # create augmentation modules (only used for training) if their configs
+    # are present
+
+    multiply_batch_config = contextnet_params.get('MultiplyBatch', None)
+    if multiply_batch_config:
+        multiply_batch = nemo_asr.MultiplyBatch(**multiply_batch_config)
+
+    spectr_augment_config = contextnet_params.get('SpectrogramAugmentation', None)
+    if spectr_augment_config:
+        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)
+
+    # assemble train DAG
+
+    (audio_signal_t, a_sig_length_t, transcript_t, transcript_len_t,) = data_layer_train()
+
+    processed_signal_t, p_length_t = data_preprocessor(input_signal=audio_signal_t, length=a_sig_length_t)
+
+    if multiply_batch_config:
+        (processed_signal_t, p_length_t, transcript_t, transcript_len_t,) = multiply_batch(
+            in_x=processed_signal_t, in_x_len=p_length_t, in_y=transcript_t, in_y_len=transcript_len_t,
+        )
+
+    if spectr_augment_config:
+        processed_signal_t = data_spectr_augmentation(input_spec=processed_signal_t)
+
+    encoded_t, encoded_len_t = encoder(audio_signal=processed_signal_t, length=p_length_t)
+    log_probs_t = decoder(encoder_output=encoded_t)
+    predictions_t = greedy_decoder(log_probs=log_probs_t)
+    loss_t = ctc_loss(
+        log_probs=log_probs_t, targets=transcript_t, input_length=encoded_len_t, target_length=transcript_len_t,
+    )
+
+    # create train callbacks
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
+        print_func=partial(monitor_asr_train_progress, labels=vocab),
+        get_tb_values=lambda x: [["loss", x[0]]],
+        tb_writer=neural_factory.tb_writer,
+        step_freq=args.update_freq,
+    )
+
+    callbacks = [train_callback]
+
+    if args.checkpoint_dir or args.load_dir:
+        chpt_callback = nemo.core.CheckpointCallback(
+            folder=args.checkpoint_dir, load_from_folder=args.load_dir, step_freq=args.checkpoint_save_freq,
+        )
+
+        callbacks.append(chpt_callback)
+
+    # Log training metrics to wandb
+    if args.project is not None:
+        wand_callback = nemo.core.WandbCallback(
+            train_tensors=[loss_t],
+            wandb_name=args.exp_name,
+            wandb_project=args.project,
+            update_freq=args.update_freq,
+            args=args,
+        )
+        callbacks.append(wand_callback)
+
+    # assemble eval DAGs
+    for i, eval_dl in enumerate(data_layers_eval):
+        (audio_signal_e, a_sig_length_e, transcript_e, transcript_len_e,) = eval_dl()
+        processed_signal_e, p_length_e = data_preprocessor(input_signal=audio_signal_e, length=a_sig_length_e)
+        encoded_e, encoded_len_e = encoder(audio_signal=processed_signal_e, length=p_length_e)
+        log_probs_e = decoder(encoder_output=encoded_e)
+        predictions_e = greedy_decoder(log_probs=log_probs_e)
+        loss_e = ctc_loss(
+            log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e,
+        )
+
+        # create corresponding eval callback
+        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
+
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=[loss_e, predictions_e, transcript_e, transcript_len_e,],
+            user_iter_callback=partial(process_evaluation_batch, labels=vocab),
+            user_epochs_done_callback=partial(process_evaluation_epoch, tag=tagname),
+            eval_step=args.eval_freq,
+            tb_writer=neural_factory.tb_writer,
+        )
+
+        callbacks.append(eval_callback)
+
+    return loss_t, callbacks, steps_per_epoch
+
+
+def main():
+    args = parse_args()
+
+    name = construct_name(
+        args.exp_name,
+        args.lr,
+        args.batch_size,
+        args.num_epochs,
+        args.weight_decay,
+        args.optimizer,
+        args.kernel_size_factor,
+    )
+    work_dir = name
+    if args.work_dir:
+        work_dir = os.path.join(args.work_dir, name)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=work_dir,
+        checkpoint_dir=args.checkpoint_dir,
+        create_tb_writer=args.create_tb_writer,
+        files_to_copy=[args.model_config, __file__],
+        cudnn_benchmark=args.cudnn_benchmark,
+        tensorboard_dir=args.tensorboard_dir,
+    )
+    args.num_gpus = neural_factory.world_size
+
+    args.checkpoint_dir = neural_factory.checkpoint_dir
+
+    if args.local_rank is not None:
+        logging.info('Doing ALL GPU')
+
+    # build dags
+    train_loss, callbacks, steps_per_epoch = create_all_dags(args, neural_factory)
+
+    # train model
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=CosineAnnealing(
+            args.num_epochs * steps_per_epoch,
+            warmup_steps=args.warmup_steps,
+            warmup_ratio=args.warmup_ratio,
+            min_lr=args.min_lr,
+        ),
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "lr": args.lr,
+            "betas": (args.beta1, args.beta2),
+            "weight_decay": args.weight_decay,
+            "grad_norm_clip": None,
+            "amp_min_loss_scale": 1e-4,
+        },
+        batches_per_step=args.iter_per_step,
+        synced_batchnorm=args.synced_bn,
+        synced_batchnorm_groupsize=args.synced_bn_groupsize,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/asr/experimental/garnet.py b/examples/asr/experimental/garnet.py
index bdb99a533eaf..061b0e563fcf 100644
--- a/examples/asr/experimental/garnet.py
+++ b/examples/asr/experimental/garnet.py
@@ -53,7 +53,7 @@ def parse_args():
         type=int,
         default=None,
         required=True,
-        help="number of epochs to train. You should specify" "either num_epochs or max_steps",
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
     )
     parser.add_argument(
         "--model_config", type=str, required=True, help="model configuration file: model.yaml",
diff --git a/examples/asr/experimental/garnet_rnnlm.py b/examples/asr/experimental/garnet_rnnlm.py
index d2d852300fe8..8a355d3f51dc 100644
--- a/examples/asr/experimental/garnet_rnnlm.py
+++ b/examples/asr/experimental/garnet_rnnlm.py
@@ -53,7 +53,7 @@ def parse_args():
         type=int,
         default=None,
         required=True,
-        help="number of epochs to train. You should specify" "either num_epochs or max_steps",
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
     )
     parser.add_argument(
         "--model_config", type=str, required=True, help="model configuration file: model.yaml",
diff --git a/examples/asr/jasper_an4.py b/examples/asr/jasper_an4.py
index 6f2de721e13b..64daaac586f5 100644
--- a/examples/asr/jasper_an4.py
+++ b/examples/asr/jasper_an4.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import argparse
-import copy
 import math
 import os
 from functools import partial
@@ -18,77 +17,71 @@
     process_evaluation_epoch,
     word_error_rate,
 )
+from nemo.core import NeuralGraph
+from nemo.utils import logging
 from nemo.utils.lr_policies import CosineAnnealing
 
-logging = nemo.logging
 
+def create_dags(model_config_file, vocab, args, nf):
 
-def create_dags(jasper_params, args, nf):
-    vocab = jasper_params['labels']
-
-    # build train and eval model
-    train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
-    train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"])
-    del train_dl_params["train"]
-    del train_dl_params["eval"]
-
-    data_layer = nemo_asr.AudioToTextDataLayer(
-        manifest_filepath=args.train_dataset, labels=vocab, batch_size=args.batch_size, **train_dl_params,
-    )
-
-    num_samples = len(data_layer)
-    steps_per_epoch = math.ceil(num_samples / (args.batch_size * args.iter_per_step * nf.world_size))
-    total_steps = steps_per_epoch * args.num_epochs
-    logging.info("Train samples=", num_samples, "num_steps=", total_steps)
-
-    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-        **jasper_params["AudioToMelSpectrogramPreprocessor"]
-    )
-
-    # data_augmentation = nemo_asr.SpectrogramAugmentation(
-    #     **jasper_params['SpectrogramAugmentation']
-    # )
-
-    eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"])
-    eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"])
-    del eval_dl_params["train"]
-    del eval_dl_params["eval"]
-
-    data_layer_eval = nemo_asr.AudioToTextDataLayer(
-        manifest_filepath=args.eval_datasets, labels=vocab, batch_size=args.eval_batch_size, **eval_dl_params,
-    )
+    with NeuralGraph() as g0:
+        # Create a data_layer for training.
+        data_layer = nemo_asr.AudioToTextDataLayer.import_from_config(
+            model_config_file,
+            "AudioToTextDataLayer_train",
+            overwrite_params={"manifest_filepath": args.train_dataset, "batch_size": args.batch_size},
+        )
 
-    num_samples = len(data_layer_eval)
-    logging.info(f"Eval samples={num_samples}")
+        num_samples = len(data_layer)
+        steps_per_epoch = math.ceil(num_samples / (data_layer.batch_size * args.iter_per_step * nf.world_size))
+        total_steps = steps_per_epoch * args.num_epochs
+        logging.info("Train samples=", num_samples, "num_steps=", total_steps)
 
-    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"])
+        # Create a data_layer for evaluation.
+        data_layer_eval = nemo_asr.AudioToTextDataLayer.import_from_config(
+            model_config_file, "AudioToTextDataLayer_eval", overwrite_params={"manifest_filepath": args.eval_datasets},
+        )
 
-    jasper_decoder = nemo_asr.JasperDecoderForCTC(num_classes=len(vocab), **jasper_params["JasperDecoderForCTC"])
+        num_samples = len(data_layer_eval)
+        logging.info(f"Eval samples={num_samples}")
 
-    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))
+        # Instantiate data processor.
+        data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.import_from_config(
+            model_config_file, "AudioToMelSpectrogramPreprocessor"
+        )
 
-    greedy_decoder = nemo_asr.GreedyCTCDecoder()
+        # Instantiate JASPER encoder-decoder modules.
+        jasper_encoder = nemo_asr.JasperEncoder.import_from_config(model_config_file, "JasperEncoder")
+        jasper_decoder = nemo_asr.JasperDecoderForCTC.import_from_config(
+            model_config_file, "JasperDecoderForCTC", overwrite_params={"num_classes": len(vocab)}
+        )
 
-    # Training model
-    audio, audio_len, transcript, transcript_len = data_layer()
-    processed, processed_len = data_preprocessor(input_signal=audio, length=audio_len)
-    encoded, encoded_len = jasper_encoder(audio_signal=processed, length=processed_len)
-    log_probs = jasper_decoder(encoder_output=encoded)
-    predictions = greedy_decoder(log_probs=log_probs)
-    loss = ctc_loss(log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,)
+        # Instantiate losses.
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab))
+        greedy_decoder = nemo_asr.GreedyCTCDecoder()
+
+        # Create a training graph.
+        audio, audio_len, transcript, transcript_len = data_layer()
+        processed, processed_len = data_preprocessor(input_signal=audio, length=audio_len)
+        encoded, encoded_len = jasper_encoder(audio_signal=processed, length=processed_len)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        predictions = greedy_decoder(log_probs=log_probs)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
 
-    # Evaluation model
-    audio_e, audio_len_e, transcript_e, transcript_len_e = data_layer_eval()
-    processed_e, processed_len_e = data_preprocessor(input_signal=audio_e, length=audio_len_e)
-    encoded_e, encoded_len_e = jasper_encoder(audio_signal=processed_e, length=processed_len_e)
-    log_probs_e = jasper_decoder(encoder_output=encoded_e)
-    predictions_e = greedy_decoder(log_probs=log_probs_e)
-    loss_e = ctc_loss(
-        log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e,
-    )
+        # Create an evaluation graph.
+        audio_e, audio_len_e, transcript_e, transcript_len_e = data_layer_eval()
+        processed_e, processed_len_e = data_preprocessor(input_signal=audio_e, length=audio_len_e)
+        encoded_e, encoded_len_e = jasper_encoder(audio_signal=processed_e, length=processed_len_e)
+        log_probs_e = jasper_decoder(encoder_output=encoded_e)
+        predictions_e = greedy_decoder(log_probs=log_probs_e)
+        loss_e = ctc_loss(
+            log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e,
+        )
     logging.info("Num of params in encoder: {0}".format(jasper_encoder.num_weights))
 
-    # Callbacks to print info to console and Tensorboard
+    # Callbacks to print info to console and Tensorboard.
     train_callback = nemo.core.SimpleLossLoggerCallback(
         tensors=[loss, predictions, transcript, transcript_len],
         print_func=partial(monitor_asr_train_progress, labels=vocab),
@@ -105,17 +98,12 @@ def create_dags(jasper_params, args, nf):
         user_epochs_done_callback=process_evaluation_epoch,
         eval_step=args.eval_freq,
         tb_writer=nf.tb_writer,
+        eval_at_start=not args.do_not_eval_at_start,
     )
     callbacks = [train_callback, checkpointer_callback, eval_callback]
-    return (
-        loss,
-        eval_tensors,
-        callbacks,
-        total_steps,
-        vocab,
-        log_probs_e,
-        encoded_len_e,
-    )
+
+    # Return entities required by the actual training.
+    return (loss, eval_tensors, callbacks, total_steps, log_probs_e, encoded_len_e, g0)
 
 
 def main():
@@ -129,11 +117,13 @@ def main():
 
     # Create new args
     # parser.add_argument("--lm", default="./an4-lm.3gram.binary", type=str)
+    parser.add_argument("--batch_size", default=48, type=int, help="size of the training batch")
     parser.add_argument("--lm", default=None, type=str)
     parser.add_argument("--test_after_training", action='store_true')
     parser.add_argument("--momentum", type=float)
     parser.add_argument("--beta1", default=0.95, type=float)
     parser.add_argument("--beta2", default=0.25, type=float)
+    parser.add_argument("--do_not_eval_at_start", action='store_true')
     parser.set_defaults(
         model_config="./configs/jasper_an4.yaml",
         train_dataset="~/TestData/an4_dataset/an4_train.json",
@@ -141,8 +131,6 @@ def main():
         work_dir="./tmp",
         optimizer="novograd",
         num_epochs=50,
-        batch_size=48,
-        eval_batch_size=64,
         lr=0.02,
         weight_decay=0.005,
         checkpoint_save_freq=1000,
@@ -172,9 +160,11 @@ def main():
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
         jasper_params = yaml.load(f)
+    # Get vocabulary.
+    vocab = jasper_params['labels']
 
-    (loss, eval_tensors, callbacks, total_steps, vocab, log_probs_e, encoded_len_e,) = create_dags(
-        jasper_params, args, nf
+    (loss, eval_tensors, callbacks, total_steps, log_probs_e, encoded_len_e, g0) = create_dags(
+        args.model_config, vocab, args, nf
     )
 
     nf.train(
@@ -200,7 +190,7 @@ def main():
         logging.info("Testing greedy and beam search with LM WER.")
         # Create BeamSearch NM
         if nf.world_size > 1 or args.lm is None:
-            logging.warning("Skipping beam search WER as it does not " "work if doing distributed training.")
+            logging.warning("Skipping beam search WER as it does not work if doing distributed training.")
         else:
             beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                 vocab=vocab, beam_width=64, alpha=2.0, beta=1.5, lm_path=args.lm, num_cpus=max(os.cpu_count(), 1),
@@ -239,13 +229,15 @@ def main():
             folder=checkpoint_dir, step_freq=args.checkpoint_save_freq, force_load=True,
         )
 
-        # Distributed Data Parallel changes the underlying class so we need
-        # to reinstantiate Encoder and Decoder
         args.num_epochs += 10
         previous_step_count = total_steps
-        loss, eval_tensors, callbacks, total_steps, vocab, _, _ = create_dags(jasper_params, args, nf)
 
+        # Distributed Data Parallel and amp changes the underlying class so we need to reinstantiate modules
+        # Clear the module registry
+        nemo.utils.app_state.AppState().modules.clear()
         nf.reset_trainer()
+        loss, eval_tensors, callbacks, total_steps, _, _, new_g = create_dags(args.model_config, vocab, args, nf)
+
         nf.train(
             tensors_to_optimize=[loss],
             callbacks=callbacks,
diff --git a/examples/asr/notebooks/1_ASR_tutorial_using_NeMo.ipynb b/examples/asr/notebooks/1_ASR_tutorial_using_NeMo.ipynb
index aa5341ae91b8..47202325adba 100644
--- a/examples/asr/notebooks/1_ASR_tutorial_using_NeMo.ipynb
+++ b/examples/asr/notebooks/1_ASR_tutorial_using_NeMo.ipynb
@@ -18,9 +18,7 @@
     "# If you're using Google Colab and not running locally, run this cell.\n",
     "!pip install wget\n",
     "!apt-get install sox\n",
-    "!pip install git+https://github.com/NVIDIA/apex.git\n",
-    "!pip install nemo-toolkit\n",
-    "!pip install nemo-asr\n",
+    "!pip install nemo_toolkit[asr]==0.10.0b10\n",
     "!pip install unidecode\n",
     "\n",
     "!mkdir configs\n",
@@ -362,9 +360,9 @@
     "\n",
     "A Jasper model looks like roughly this:\n",
     "\n",
-    "![Jasper with CTC](https://raw.githubusercontent.com/NVIDIA/NeMo/master/docs/sources/source/asr/jasper.png)\n",
+    "![Jasper with CTC](https://raw.githubusercontent.com/NVIDIA/NeMo/master/docs/sources/source/asr/jasper_vertical.png)\n",
     "\n",
-    "#### Specifying Our Model with a YAML File\n",
+    "#### Specifying Our Model with a YAML Config File\n",
     "\n",
     "For this tutorial, we'll build a *Jasper_4x1 model*, with `K=4` blocks of single (`R=1`) sub-blocks and a *greedy CTC decoder*, using the configuration found in `./configs/jasper_an4.yaml`.\n",
     "\n",
@@ -382,49 +380,22 @@
     "Next, we have four entries that correspond to the `K=4` blocks, and each has `repeat: 1` since we are using `R=1`.\n",
     "These are followed by two more entries for the blocks that appear at the end of our Jasper model before the CTC loss.\n",
     "\n",
-    "There are also some entries at the top of the file that specify that we should be shuffling our training data but not our evaluation data (see `AudioToTextDataLayer`), and some specifications for preprocessing and converting the audio data (in `AudioToMelSpectrogramPreprocessor`).\n",
+    "There are also some entries at the top of the file that specify that we should be shuffling our training data but not our evaluation data (see `AudioToTextDataLayer_train` and `AudioToTextDataLayer_eval`), and some specifications for preprocessing and converting the audio data (in `AudioToMelSpectrogramPreprocessor`).\n",
     "\n",
-    "Using a YAML config such as this is helpful for getting a quick and human-readable overview of what your architecture looks like, and allows you to swap out model and run configurations easily.\n",
+    "Using a YAML config such as this is helpful for getting a quick and human-readable overview of what your architecture looks like, and allows you to swap out model and run configurations easily without needing to change your code.\n",
     "\n",
     "#### Building Training and Evaluation DAGs with NeMo\n",
     "\n",
-    "Building a model using NeMo consists of (1) instantiating the neural modules we need and (2) specifying the DAG by linking them together.\n",
-    "\n",
-    "Let's start by loading the config."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# --- Loading Config --- #\n",
-    "from ruamel.yaml import YAML\n",
-    "\n",
-    "# Parse config and pass to model building function\n",
-    "config_path = '../configs/jasper_an4.yaml'\n",
-    "yaml = YAML(typ='safe')\n",
-    "with open(config_path) as f:\n",
-    "    params = yaml.load(f)\n",
-    "    print(\"******\\nLoaded config file.\\n******\")\n",
-    "\n",
-    "labels = params['labels']  # Vocab of tokens\n",
-    "sample_rate = params['sample_rate']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In NeMo, **the training and inference pipelines are managed by a `NeuralModuleFactory`**, which takes care of checkpointing, callbacks, and logs, along with other details in training and inference. We set its `log_dir` argument to specify where our model logs and outputs will be written, and can set other training and inference settings in its constructor. For instance, if we were **resuming training from a checkpoint**, we would set the argument `checkpoint_dir=<path_to_checkpoint>`."
+    "Building a model using NeMo consists of (1) instantiating the neural modules we need and (2) specifying the DAG by linking them together."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Along with logs in Nemo, you can optionally view the tensorboard logs with ``create_tb_writer=True`` optional argument to ``NeuralModuleFactory``. By default all the tensorboard log files will be stored in {log_dir}/tensorboard directory but you can change this with tensorboard_dir argument. One can load tensorboard logs through tensorboard by running ``tensorboard --logdir=<path_to_tensorboard dir>`` in terminal"
+    "In NeMo, **the training and inference pipelines are managed by a `NeuralModuleFactory`**, which takes care of checkpointing, callbacks, and logs, along with other details in training and inference. We set its `log_dir` argument to specify where our model logs and outputs will be written, and can set other training and inference settings in its constructor. For instance, if we were **resuming training from a checkpoint**, we would set the argument `checkpoint_dir=<path_to_checkpoint>`.\n",
+    "\n",
+    "Along with logs in NeMo, you can optionally view the tensorboard logs with the `create_tb_writer=True` argument to the `NeuralModuleFactory`. By default all the tensorboard log files will be stored in `{log_dir}/tensorboard`, but you can change this with the `tensorboard_dir` argument. One can load tensorboard logs through tensorboard by running `tensorboard --logdir=<path_to_tensorboard dir>` in the terminal."
    ]
   },
   {
@@ -438,7 +409,7 @@
     "    log_dir=data_dir+'/an4_tutorial/',\n",
     "    create_tb_writer=True)\n",
     "\n",
-    "logger = neural_factory.logger"
+    "logger = nemo.logging"
    ]
   },
   {
@@ -458,7 +429,7 @@
     "    )\n",
     "```\n",
     "\n",
-    "Now that we have the configurations and our neural module factory, we can specify our neural modules and instantiate them."
+    "Now that we have our neural module factory, we can **specify our neural modules and instantiate them**. Here, we load the parameters for each module from the configuration file using `import_from_config`. For the module parameters that we can't read directly from the config file or want to overwrite, we can use the `overwrite_params` argument."
    ]
   },
   {
@@ -467,33 +438,44 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# --- Config Information ---#\n",
+    "from ruamel.yaml import YAML\n",
+    "\n",
+    "config_path = '../configs/jasper_an4.yaml'\n",
+    "\n",
+    "yaml = YAML(typ='safe')\n",
+    "with open(config_path) as f:\n",
+    "    params = yaml.load(f)\n",
+    "labels = params['labels'] # Vocab\n",
+    "\n",
     "# --- Instantiate Neural Modules --- #\n",
     "\n",
     "# Create training and test data layers (which load data) and data preprocessor\n",
-    "data_layer_train = nemo_asr.AudioToTextDataLayer(\n",
-    "    manifest_filepath=train_manifest,\n",
-    "    sample_rate=sample_rate,\n",
-    "    labels=labels,\n",
-    "    batch_size=32,\n",
-    "    **params['AudioToTextDataLayer']['train'])  # Training datalayer\n",
-    "\n",
-    "data_layer_test = nemo_asr.AudioToTextDataLayer(\n",
-    "    manifest_filepath=test_manifest,\n",
-    "    sample_rate=sample_rate,\n",
-    "    labels=labels,\n",
-    "    batch_size=32,\n",
-    "    **params['AudioToTextDataLayer']['eval'])   # Eval datalayer\n",
-    "\n",
-    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
-    "    sample_rate=sample_rate,\n",
-    "    **params['AudioToMelSpectrogramPreprocessor'])\n",
+    "data_layer_train = nemo_asr.AudioToTextDataLayer.import_from_config(\n",
+    "    config_path,\n",
+    "    \"AudioToTextDataLayer_train\",\n",
+    "    overwrite_params={\"manifest_filepath\": train_manifest}\n",
+    ") # Training datalayer\n",
+    "\n",
+    "data_layer_test = nemo_asr.AudioToTextDataLayer.import_from_config(\n",
+    "    config_path,\n",
+    "    \"AudioToTextDataLayer_eval\",\n",
+    "    overwrite_params={\"manifest_filepath\": test_manifest}\n",
+    ") # Eval datalayer\n",
+    "\n",
+    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.import_from_config(\n",
+    "    config_path, \"AudioToMelSpectrogramPreprocessor\"\n",
+    ")\n",
     "\n",
     "# Create the Jasper_4x1 encoder as specified, and a CTC decoder\n",
-    "encoder = nemo_asr.JasperEncoder(**params['JasperEncoder'])\n",
+    "encoder = nemo_asr.JasperEncoder.import_from_config(\n",
+    "    config_path, \"JasperEncoder\"\n",
+    ")\n",
     "\n",
-    "decoder = nemo_asr.JasperDecoderForCTC(\n",
-    "    feat_in=params['JasperEncoder']['jasper'][-1]['filters'],\n",
-    "    num_classes=len(labels))\n",
+    "decoder = nemo_asr.JasperDecoderForCTC.import_from_config(\n",
+    "    config_path, \"JasperDecoderForCTC\",\n",
+    "    overwrite_params={\"num_classes\": len(labels)}\n",
+    ")\n",
     "\n",
     "ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))\n",
     "greedy_decoder = nemo_asr.GreedyCTCDecoder()"
@@ -756,7 +738,9 @@
     "    input_length=encoded_len,\n",
     "    target_length=transcript_len)\n",
     "\n",
-    "# And then you can train as usual."
+    "# And then you can train as usual.\n",
+    "# If you want to try it out in this notebook,\n",
+    "# be sure to run neural_factory.reset_trainer() right before training again."
    ]
   },
   {
@@ -853,7 +837,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
diff --git a/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb b/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb
index 9352d5fb0102..170200f2d622 100644
--- a/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb
+++ b/examples/asr/notebooks/2_Online_ASR_Microphone_Demo.ipynb
@@ -43,9 +43,9 @@
    "outputs": [],
    "source": [
     "# the checkpoints are available from NGC: https://ngc.nvidia.com/catalog/models/nvidia:quartznet15x5\n",
-    "MODEL_YAML = 'examples/asr/configs/quartznet15x5.yaml'\n",
-    "CHECKPOINT_ENCODER = './quartznet15x5/JasperEncoder-STEP-247400.pt'\n",
-    "CHECKPOINT_DECODER = './quartznet15x5/JasperDecoderForCTC-STEP-247400.pt'"
+    "MODEL_YAML = '../configs/quartznet15x5.yaml'\n",
+    "CHECKPOINT_ENCODER = '../../../quartznet15x5/JasperEncoder-STEP-247400.pt'\n",
+    "CHECKPOINT_DECODER = '../../../quartznet15x5/JasperDecoderForCTC-STEP-247400.pt'"
    ]
   },
   {
@@ -119,7 +119,7 @@
    "outputs": [],
    "source": [
     "from nemo.backends.pytorch.nm import DataLayerNM\n",
-    "from nemo.core.neural_types import NeuralType, BatchTag, TimeTag, AxisType\n",
+    "from nemo.core.neural_types import NeuralType, AudioSignal, LengthsType\n",
     "import torch\n",
     "\n",
     "# simple data layer to pass audio signal\n",
@@ -127,14 +127,13 @@
     "    @property\n",
     "    def output_ports(self):\n",
     "        return {\n",
-    "            \"audio_signal\": NeuralType({0: AxisType(BatchTag),\n",
-    "                                        1: AxisType(TimeTag)}),\n",
-    "\n",
-    "            \"a_sig_length\": NeuralType({0: AxisType(BatchTag)}),\n",
+    "            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),\n",
+    "            'a_sig_length': NeuralType(tuple('B'), LengthsType()),\n",
     "        }\n",
     "\n",
-    "    def __init__(self):\n",
+    "    def __init__(self, sample_rate):\n",
     "        super().__init__()\n",
+    "        self._sample_rate = sample_rate\n",
     "        self.output = True\n",
     "        \n",
     "    def __iter__(self):\n",
@@ -171,7 +170,7 @@
    "outputs": [],
    "source": [
     "# Instantiate necessary neural modules\n",
-    "data_layer = AudioDataLayer()\n",
+    "data_layer = AudioDataLayer(sample_rate=model_definition['sample_rate'])\n",
     "\n",
     "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
     "    **model_definition['AudioToMelSpectrogramPreprocessor'])\n",
diff --git a/examples/asr/notebooks/3_Speech_Commands_using_NeMo.ipynb b/examples/asr/notebooks/3_Speech_Commands_using_NeMo.ipynb
new file mode 100644
index 000000000000..36a9834ee800
--- /dev/null
+++ b/examples/asr/notebooks/3_Speech_Commands_using_NeMo.ipynb
@@ -0,0 +1,1016 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "!pip install wget\n",
+    "!pip install git+https://github.com/NVIDIA/apex.git\n",
+    "!pip install nemo-toolkit\n",
+    "!pip install nemo-asr\n",
+    "!pip install unidecode\n",
+    "\n",
+    "!mkdir configs\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/master/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/master/examples/asr/configs/quartznet_speech_commands_3x1_v2.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import some necessary libraries\n",
+    "import os\n",
+    "import argparse\n",
+    "import copy\n",
+    "import math\n",
+    "import os\n",
+    "import glob\n",
+    "from functools import partial\n",
+    "from datetime import datetime\n",
+    "from ruamel.yaml import YAML"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "\n",
+    "This Speech Command recognition tutorial is based on the QuartzNet model from the paper \"[QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions](https://arxiv.org/pdf/1910.10261.pdf)\" with a modified decoder head to suit classification tasks.\n",
+    "\n",
+    "The notebook will follow the steps below:\n",
+    "\n",
+    " - Dataset preparation: Preparing Google Speech Commands dataset\n",
+    "\n",
+    " - Audio preprocessing (feature extraction): signal normalization, windowing, (log) spectrogram (or mel scale spectrogram, or MFCC)\n",
+    "\n",
+    " - Data augmentation using SpecAugment \"[SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition](https://arxiv.org/abs/1904.08779)\" to increase number of data samples.\n",
+    " \n",
+    " - Develop a small Neural classification model which can be trained efficiently.\n",
+    " \n",
+    " - Model training on the Google Speech Commands dataset in NeMo.\n",
+    " \n",
+    " - Evaluation of error cases of the model by audibly hearing the samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is where the Google Speech Commands directory will be placed.\n",
+    "# Change this if you don't want the data to be extracted in the current directory.\n",
+    "# Select the version of the dataset required as well (can be 1 or 2)\n",
+    "DATASET_VER = 1\n",
+    "data_dir = './google_dataset_v{0}/'.format(DATASET_VER)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Preparation\n",
+    "\n",
+    "We will be using the open source Google Speech Commands Dataset (we will use V1 of the dataset for the tutorial, but require very minor changes to support V2 dataset). These scripts below will download the dataset and convert it to a format suitable for use with nemo_asr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Download the dataset\n",
+    "\n",
+    "The dataset must be prepared using the scripts provided under the `{NeMo root directory}/scripts` sub-directory. \n",
+    "\n",
+    "Run the following command below to download the training script and execute it.\n",
+    "\n",
+    "**NOTE**: You should have at least 4GB of disk space available if you’ve used --data_version=1; and at least 6GB if you used --data_version=2. Also, it will take some time to download and process, so go grab a coffee.\n",
+    "\n",
+    "**NOTE**: You may additionally pass a `--rebalance` flag at the end of the `process_speech_commands_data.py` script to rebalance the class samples in the manifest."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "--2020-02-27 16:14:08--  https://raw.githubusercontent.com/NVIDIA/NeMo/master/scripts/process_speech_commands_data.py\n",
+      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.40.133\n",
+      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.40.133|:443... connected.\n",
+      "HTTP request sent, awaiting response... 200 OK\n",
+      "Length: 6872 (6.7K) [text/plain]\n",
+      "Saving to: ‘process_speech_commands_data.py.1’\n",
+      "\n",
+      "process_speech_comm 100%[===================>]   6.71K  --.-KB/s    in 0s      \n",
+      "\n",
+      "2020-02-27 16:14:08 (58.8 MB/s) - ‘process_speech_commands_data.py.1’ saved [6872/6872]\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "!wget https://raw.githubusercontent.com/NVIDIA/NeMo/master/scripts/process_speech_commands_data.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mkdir: cannot create directory ‘./google_dataset_v1/’: File exists\n",
+      "Dataset ready !\n"
+     ]
+    }
+   ],
+   "source": [
+    "!mkdir {data_dir}\n",
+    "!python process_speech_commands_data.py --data_root={data_dir} --data_version={DATASET_VER}\n",
+    "print(\"Dataset ready !\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare the path to manifest files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dtaset_path = 'google_speech_recognition_v{0}'.format(DATASET_VER)\n",
+    "dataset_basedir = os.path.join(data_dir, dtaset_path)\n",
+    "\n",
+    "train_dataset = os.path.join(dataset_basedir, 'train_manifest.json')\n",
+    "val_dataset = os.path.join(dataset_basedir, 'validation_manifest.json')\n",
+    "test_dataset = os.path.join(dataset_basedir, 'validation_manifest.json')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read a few rows of the manifest file \n",
+    "\n",
+    "Manifest files are the data structure used by NeMo to declare a few important details about the data :\n",
+    "\n",
+    "1) `audio_filepath`: Refers to the path to the raw audio file <br>\n",
+    "2) `command`: The class label (or speech command) of this sample <br>\n",
+    "3) `duration`: The length of the audio file, in seconds."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "head: cannot open '{train_manifest}' for reading: No such file or directory\n"
+     ]
+    }
+   ],
+   "source": [
+    "!head -n 5 {train_dataset}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training - Preparation\n",
+    "\n",
+    "We will be training a QuartzNet model from the paper \"[QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions](https://arxiv.org/pdf/1910.10261.pdf)\". The benefit of QuartzNet over JASPER models is that they use Separable Convolutions, which greatly reduce the number of parameters required to get good model accuracy.\n",
+    "\n",
+    "QuartzNet models generally follow the model definition pattern QuartzNet-[BxR], where B is the number of blocks and R is the number of convolutional sub-blocks. Each sub-block contains a 1-D masked convolution, batch normalization, ReLU, and dropout:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets load the config file for the QuartzNet 3x1 model\n",
+    "# Here we will be using separable convolutions\n",
+    "# with 3 blocks (k=3 repeated once r=1 from the picture above)\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(\"configs/quartznet_speech_commands_3x1_v{0}.yaml\".format(DATASET_VER)) as f:\n",
+    "    jasper_params = yaml.load(f)\n",
+    "\n",
+    "# Pre-define a set of labels that this model must learn to predict\n",
+    "labels = jasper_params['labels']\n",
+    "\n",
+    "# Get the sampling rate of the data\n",
+    "sample_rate = jasper_params['sample_rate']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import NeMo core functionality\n",
+    "# NeMo's \"core\" package\n",
+    "import nemo\n",
+    "# NeMo's ASR collection\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "# NeMo's learning rate policy\n",
+    "from nemo.utils.lr_policies import CosineAnnealing\n",
+    "from nemo.collections.asr.helpers import (\n",
+    "    monitor_classification_training_progress,\n",
+    "    process_classification_evaluation_batch,\n",
+    "    process_classification_evaluation_epoch,\n",
+    ")\n",
+    "from nemo.collections.asr.metrics import classification_accuracy\n",
+    "\n",
+    "from nemo.utils import logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define some model hyper parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets define some hyper parameters\n",
+    "lr = 0.05\n",
+    "num_epochs = 5\n",
+    "batch_size = 128\n",
+    "weight_decay = 0.001"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the NeMo components"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[NeMo W 2020-02-27 16:14:20 deprecated:68] Function ``_get_trainer`` is deprecated. It is going to be removed in the future version.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a Neural Factory\n",
+    "# It creates log files and tensorboard writers for us among other functions\n",
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "    log_dir='./{0}/quartznet-3x1-v{1}'.format(dataset_basedir, DATASET_VER),\n",
+    "    create_tb_writer=True)\n",
+    "tb_writer = neural_factory.tb_writer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:21 collections:215] Filtered duration for loading collection is 0.000000.\n",
+      "[NeMo I 2020-02-27 16:14:21 collections:215] Filtered duration for loading collection is 0.000000.\n",
+      "[NeMo I 2020-02-27 16:14:21 features:144] PADDING: 16\n",
+      "[NeMo I 2020-02-27 16:14:21 features:152] STFT using conv\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-12-242cb97ccf7d>:34] Steps per epoch : 401\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-12-242cb97ccf7d>:35] Have 51088 examples to train on.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n",
+      "  warnings.warn(warning.format(ret))\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check if data augmentation such as white noise and time shift augmentation should be used\n",
+    "audio_augmentor = jasper_params.get('AudioAugmentor', None)\n",
+    "\n",
+    "# Build the input data layer and the preprocessing layers for the train set\n",
+    "train_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "    manifest_filepath=train_dataset,\n",
+    "    labels=labels,\n",
+    "    sample_rate=sample_rate,\n",
+    "    batch_size=batch_size,\n",
+    "    num_workers=os.cpu_count(),\n",
+    "    augmentor=audio_augmentor,\n",
+    "    shuffle=True\n",
+    ")\n",
+    "\n",
+    " # Build the input data layer and the preprocessing layers for the test set\n",
+    "eval_data_layer = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "    manifest_filepath=test_dataset,\n",
+    "    sample_rate=sample_rate,\n",
+    "    labels=labels,\n",
+    "    batch_size=batch_size,\n",
+    "    num_workers=os.cpu_count(),\n",
+    "    shuffle=False,\n",
+    ")\n",
+    "\n",
+    "# We will convert the raw audio data into MelSpectrogram Features to feed as input to our model\n",
+    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
+    "    sample_rate=sample_rate, **jasper_params[\"AudioToMelSpectrogramPreprocessor\"],\n",
+    ")\n",
+    "\n",
+    "# Compute the total number of samples and the number of training steps per epoch\n",
+    "N = len(train_data_layer)\n",
+    "steps_per_epoch = math.ceil(N / float(batch_size) + 1)\n",
+    "\n",
+    "logging.info(\"Steps per epoch : {0}\".format(steps_per_epoch))\n",
+    "logging.info('Have {0} examples to train on.'.format(N))\n",
+    "\n",
+    "# Here we begin defining all of the augmentations we want\n",
+    "# We will pad the preprocessed spectrogram image to have a certain number of timesteps\n",
+    "# This centers the generated spectrogram and adds black boundaries to either side\n",
+    "# of the padded image.\n",
+    "crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(audio_length=128)\n",
+    "\n",
+    "# We also optionally add `SpecAugment` augmentations based on the config file\n",
+    "# SpecAugment has various possible augmentations to the generated spectrogram\n",
+    "# 1) Frequency band masking\n",
+    "# 2) Time band masking\n",
+    "# 3) Rectangular cutout\n",
+    "spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)\n",
+    "if spectr_augment_config:\n",
+    "    data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)\n",
+    "\n",
+    "# Build the QuartzNet Encoder model\n",
+    "# The config defines the layers as a list of dictionaries\n",
+    "# The first and last two blocks are not considered when we say QuartzNet-[BxR]\n",
+    "# B is counted as the number of blocks after the first layer and before the penultimate layer.\n",
+    "# R is defined as the number of repetitions of each block in B.\n",
+    "# Note: We can scale the convolution kernels size by the float parameter `kernel_size_factor`\n",
+    "jasper_encoder = nemo_asr.JasperEncoder(**jasper_params[\"JasperEncoder\"])\n",
+    "\n",
+    "# We then define the QuartzNet decoder.\n",
+    "# This decoder head is specialized for the task for classification, such that it\n",
+    "# accepts a set of `N-feat` per timestep of the model, and averages these features\n",
+    "# over all the timesteps, before passing a Linear classification layer on those features.\n",
+    "jasper_decoder = nemo_asr.JasperDecoderForClassification(\n",
+    "    feat_in=jasper_params[\"JasperEncoder\"][\"jasper\"][-1][\"filters\"],\n",
+    "    num_classes=len(labels),\n",
+    "    **jasper_params['JasperDecoderForClassification'],\n",
+    ")\n",
+    "\n",
+    "# We can easily apply cross entropy loss to train this model\n",
+    "ce_loss = nemo_asr.CrossEntropyLossNM()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-13-6805b5462cf6>:2] ================================\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-13-6805b5462cf6>:3] Number of parameters in encoder: 73344\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-13-6805b5462cf6>:4] Number of parameters in decoder: 3870\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-13-6805b5462cf6>:6] Total number of parameters in model: 77214\n",
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-13-6805b5462cf6>:8] ================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Lets print out the number of parameters of this model\n",
+    "logging.info('================================')\n",
+    "logging.info(f\"Number of parameters in encoder: {jasper_encoder.num_weights}\")\n",
+    "logging.info(f\"Number of parameters in decoder: {jasper_decoder.num_weights}\")\n",
+    "logging.info(\n",
+    "    f\"Total number of parameters in model: \" f\"{jasper_decoder.num_weights + jasper_encoder.num_weights}\"\n",
+    ")\n",
+    "logging.info('================================')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compile the Training Graph for NeMo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now we have all of the components that are required to build the NeMo execution graph!\n",
+    "## Build the training data loaders and preprocessors first\n",
+    "audio_signal, audio_signal_len, commands, command_len = train_data_layer()\n",
+    "processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)\n",
+    "processed_signal, processed_signal_len = crop_pad_augmentation(\n",
+    "    input_signal=processed_signal,\n",
+    "    length=audio_signal_len\n",
+    ")\n",
+    "\n",
+    "## Augment the dataset for training\n",
+    "if spectr_augment_config:\n",
+    "    processed_signal = data_spectr_augmentation(input_spec=processed_signal)\n",
+    "\n",
+    "## Define the model\n",
+    "encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)\n",
+    "decoded = jasper_decoder(encoder_output=encoded)\n",
+    "\n",
+    "## Obtain the train loss\n",
+    "train_loss = ce_loss(logits=decoded, labels=commands)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compile the Test Graph for NeMo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now we build the test graph in a similar way, reusing the above components\n",
+    "## Build the test data loader and preprocess same way as train graph\n",
+    "## But note, we do not add the spectrogram augmentation to the test graph !\n",
+    "test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_data_layer()\n",
+    "test_processed_signal, test_processed_signal_len = data_preprocessor(\n",
+    "    input_signal=test_audio_signal, length=test_audio_signal_len\n",
+    ")\n",
+    "test_processed_signal, test_processed_signal_len = crop_pad_augmentation(\n",
+    "    input_signal=test_processed_signal, length=test_processed_signal_len\n",
+    ")\n",
+    "\n",
+    "# Pass the test data through the model encoder and decoder\n",
+    "test_encoded, test_encoded_len = jasper_encoder(\n",
+    "    audio_signal=test_processed_signal, length=test_processed_signal_len\n",
+    ")\n",
+    "test_decoded = jasper_decoder(encoder_output=test_encoded)\n",
+    "\n",
+    "# Compute test loss for visualization\n",
+    "test_loss = ce_loss(logits=test_decoded, labels=test_commands)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setting up callbacks for training and test set evaluation, and checkpoint saving"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now that we have our training and evaluation graphs built,\n",
+    "# we can focus on a few callbacks to help us save the model checkpoints\n",
+    "# during training, as well as display train and test metrics\n",
+    "\n",
+    "# Callbacks needed to print train info to console and Tensorboard\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    # Notice that we pass in loss, predictions, and the labels.\n",
+    "    # Of course we would like to see our training loss, but we need the\n",
+    "    # other arguments to calculate the accuracy.\n",
+    "    tensors=[train_loss, decoded, commands],\n",
+    "    # The print_func defines what gets printed.\n",
+    "    print_func=partial(monitor_classification_training_progress, eval_metric=None),\n",
+    "    get_tb_values=lambda x: [(\"loss\", x[0])],\n",
+    "    tb_writer=neural_factory.tb_writer,\n",
+    ")\n",
+    "\n",
+    "# Callbacks needed to print test info to console and Tensorboard\n",
+    "tagname = 'TestSet'\n",
+    "eval_callback = nemo.core.EvaluatorCallback(\n",
+    "    eval_tensors=[test_loss, test_decoded, test_commands],\n",
+    "    user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),\n",
+    "    user_epochs_done_callback=partial(process_classification_evaluation_epoch, eval_metric=1, tag=tagname),\n",
+    "    eval_step=200,  # How often we evaluate the model on the test set\n",
+    "    tb_writer=neural_factory.tb_writer,\n",
+    ")\n",
+    "\n",
+    "# Callback to save model checkpoints\n",
+    "chpt_callback = nemo.core.CheckpointCallback(\n",
+    "    folder=neural_factory.checkpoint_dir,\n",
+    "    step_freq=1000,\n",
+    ")\n",
+    "\n",
+    "# Prepare a list of checkpoints to pass to the engine\n",
+    "callbacks = [train_callback, eval_callback, chpt_callback]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training the model\n",
+    "\n",
+    "Even with such a small model (77k parameters), and just 5 epochs (should take just a few minutes to train), you should be able to get a test set accuracy score in the range 85 - 90%. Not bad for a 30 (v1) or 35 (v2) way classification problem !\n",
+    "\n",
+    "Experiment with increasing the number of epochs or with batch size to see how much you can improve the score!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:24 <ipython-input-17-e865cc4031ec>:11] Using `<nemo.utils.lr_policies.CosineAnnealing object at 0x7f547c65e210>` Learning Rate Scheduler\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:179] Starting .....\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:343] Found 2 modules with weights:\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:345] JasperEncoder\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:345] JasperDecoderForClassification\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:346] Total model parameters: 77214\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:301] Restoring checkpoint from folder ././google_dataset_v1/google_speech_recognition_v1/quartznet-3x1-v1/checkpoints ...\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:186] Done in 0.025618553161621094\n",
+      "[NeMo I 2020-02-27 16:14:24 callbacks:432] Final Evaluation ..............................\n",
+      "[NeMo I 2020-02-27 16:14:27 helpers:273] ==========>>>>>>Evaluation Loss TestSet: 0.35130253434181213\n",
+      "[NeMo I 2020-02-27 16:14:27 helpers:275] ==========>>>>>>Evaluation Accuracy Top@1 TestSet: 89.5999\n",
+      "[NeMo I 2020-02-27 16:14:27 callbacks:437] Evaluation time: 2.7541420459747314 seconds\n",
+      "[NeMo I 2020-02-27 16:14:27 callbacks:293] Saved checkpoint: ././google_dataset_v1/google_speech_recognition_v1/quartznet-3x1-v1/checkpoints/trainer-STEP-2000.pt\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Now we have all the components required to train the model\n",
+    "# Lets define a learning rate schedule\n",
+    "\n",
+    "# Define a learning rate schedule\n",
+    "lr_policy = CosineAnnealing(\n",
+    "    total_steps=num_epochs * steps_per_epoch,\n",
+    "    warmup_ratio=0.05,\n",
+    "    min_lr=0.001,\n",
+    ")\n",
+    "\n",
+    "logging.info(f\"Using `{lr_policy}` Learning Rate Scheduler\")\n",
+    "\n",
+    "# Finally, lets train this model !\n",
+    "neural_factory.train(\n",
+    "    tensors_to_optimize=[train_loss],\n",
+    "    callbacks=callbacks,\n",
+    "    lr_policy=lr_policy,\n",
+    "    optimizer=\"novograd\",\n",
+    "    optimization_params={\n",
+    "        \"num_epochs\": num_epochs,\n",
+    "        \"max_steps\": None,\n",
+    "        \"lr\": lr,\n",
+    "        \"momentum\": 0.95,\n",
+    "        \"betas\": (0.98, 0.5),\n",
+    "        \"weight_decay\": weight_decay,\n",
+    "        \"grad_norm_clip\": None,\n",
+    "    },\n",
+    "    batches_per_step=1,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluation of incorrectly predicted samples\n",
+    "\n",
+    "Given that we have a trained model, which performs reasonably well, lets try to listen to the samples where the model is least confident in its predictions.\n",
+    "\n",
+    "For this, we need support of the librosa library.\n",
+    "\n",
+    "**NOTE**: The following code depends on librosa. To install it, run the following code block first"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: librosa in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (0.7.2)\n",
+      "Requirement already satisfied: decorator>=3.0.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (4.4.1)\n",
+      "Requirement already satisfied: resampy>=0.2.2 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (0.2.2)\n",
+      "Requirement already satisfied: joblib>=0.12 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (0.14.1)\n",
+      "Requirement already satisfied: scipy>=1.0.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (1.4.1)\n",
+      "Requirement already satisfied: numba>=0.43.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (0.47.0)\n",
+      "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (0.22.1)\n",
+      "Requirement already satisfied: six>=1.3 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (1.14.0)\n",
+      "Requirement already satisfied: numpy>=1.15.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (1.18.1)\n",
+      "Requirement already satisfied: audioread>=2.0.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (2.1.8)\n",
+      "Requirement already satisfied: soundfile>=0.9.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from librosa) (0.10.3.post1)\n",
+      "Requirement already satisfied: setuptools in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from numba>=0.43.0->librosa) (44.0.0.post20200106)\n",
+      "Requirement already satisfied: llvmlite>=0.31.0dev0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from numba>=0.43.0->librosa) (0.31.0)\n",
+      "Requirement already satisfied: cffi>=1.0 in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from soundfile>=0.9.0->librosa) (1.13.2)\n",
+      "Requirement already satisfied: pycparser in /home/smajumdar/anaconda3/envs/NeMo/lib/python3.7/site-packages (from cffi>=1.0->soundfile>=0.9.0->librosa) (2.19)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install librosa"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# lets add a path to the checkpoint dir\n",
+    "model_path = neural_factory.checkpoint_dir"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extract the predictions from the model\n",
+    "\n",
+    "We want to possess the actual logits of the model instead of just the final evaluation score, so we use `NeuralFactory.infer(...)` to extract the logits per batch of samples provided."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:28 actions:1453] Restoring JasperEncoder from ././google_dataset_v1/google_speech_recognition_v1/quartznet-3x1-v1/checkpoints/JasperEncoder-STEP-2000.pt\n",
+      "[NeMo I 2020-02-27 16:14:28 actions:1453] Restoring JasperDecoderForClassification from ././google_dataset_v1/google_speech_recognition_v1/quartznet-3x1-v1/checkpoints/JasperDecoderForClassification-STEP-2000.pt\n",
+      "[NeMo I 2020-02-27 16:14:29 actions:726] Evaluating batch 0 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:29 actions:726] Evaluating batch 5 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:29 actions:726] Evaluating batch 10 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:29 actions:726] Evaluating batch 15 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:30 actions:726] Evaluating batch 20 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:30 actions:726] Evaluating batch 25 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:30 actions:726] Evaluating batch 30 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:30 actions:726] Evaluating batch 35 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:30 actions:726] Evaluating batch 40 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:31 actions:726] Evaluating batch 45 out of 54\n",
+      "[NeMo I 2020-02-27 16:14:31 actions:726] Evaluating batch 50 out of 54\n"
+     ]
+    }
+   ],
+   "source": [
+    "# --- Inference Only --- #\n",
+    "# We've already built the inference DAG above, so all we need is to call infer().\n",
+    "evaluated_tensors = neural_factory.infer(\n",
+    "    # These are the tensors we want to get from the model.\n",
+    "    tensors=[test_loss, test_decoded, test_commands],\n",
+    "    # checkpoint_dir specifies where the model params are loaded from.\n",
+    "    checkpoint_dir=model_path\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Accuracy calculation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-21-674fb7de9132>:19] Total correct / Total count : 6094 / 6798\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-21-674fb7de9132>:20] Final accuracy : 0.8964401294498382\n"
+     ]
+    }
+   ],
+   "source": [
+    "correct_count = 0\n",
+    "total_count = 0\n",
+    "\n",
+    "for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):\n",
+    "    acc = classification_accuracy(\n",
+    "        logits=logits,\n",
+    "        targets=labels,\n",
+    "        top_k=[1]\n",
+    "    )\n",
+    "\n",
+    "    # Select top 1 accuracy only\n",
+    "    acc = acc[0]\n",
+    "\n",
+    "    # Since accuracy here is \"per batch\", we simply denormalize it by multiplying\n",
+    "    # by batch size to recover the count of correct samples.\n",
+    "    correct_count += int(acc * logits.size(0))\n",
+    "    total_count += logits.size(0)\n",
+    "\n",
+    "logging.info(f\"Total correct / Total count : {correct_count} / {total_count}\")\n",
+    "logging.info(f\"Final accuracy : {correct_count / float(total_count)}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Filtering out incorrect samples\n",
+    "Let us now filter out the incorrectly labeled samples from the total set of samples in the test set"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import librosa\n",
+    "import json\n",
+    "import IPython.display as ipd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First lets create a utility class to remap the integer class labels to actual string label\n",
+    "class ReverseMapLabel:\n",
+    "    def __init__(self, data_layer: nemo_asr.AudioToSpeechLabelDataLayer):\n",
+    "        self.label2id = dict(data_layer._dataset.label2id)\n",
+    "        self.id2label = dict(data_layer._dataset.id2label)\n",
+    "\n",
+    "    def __call__(self, pred_idx, label_idx):\n",
+    "        return self.id2label[pred_idx], self.id2label[label_idx]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-24-3ed571e8b863>:22] Num test samples : 6798\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-24-3ed571e8b863>:23] Num errors : 704\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Next, lets get the indices of all the incorrectly labeled samples\n",
+    "sample_idx = 0\n",
+    "incorrect_preds = []\n",
+    "rev_map = ReverseMapLabel(eval_data_layer)\n",
+    "\n",
+    "# Remember, evaluated_tensor = (loss, logits, labels)\n",
+    "for batch_idx, (logits, labels) in enumerate(zip(evaluated_tensors[1], evaluated_tensors[2])):\n",
+    "    probs = torch.softmax(logits, dim=-1)\n",
+    "    probas, preds = torch.max(probs, dim=-1)\n",
+    "\n",
+    "    incorrect_ids = (preds != labels).nonzero()\n",
+    "    for idx in incorrect_ids:\n",
+    "        proba = float(probas[idx][0])\n",
+    "        pred = int(preds[idx][0])\n",
+    "        label = int(labels[idx][0])\n",
+    "        idx = int(idx[0]) + sample_idx\n",
+    "\n",
+    "        incorrect_preds.append((idx, *rev_map(pred, label), proba))\n",
+    "\n",
+    "    sample_idx += labels.size(0)\n",
+    "\n",
+    "logging.info(f\"Num test samples : {total_count}\")\n",
+    "logging.info(f\"Num errors : {len(incorrect_preds)}\")\n",
+    "\n",
+    "# First lets sort by confidence of prediction\n",
+    "incorrect_preds = sorted(incorrect_preds, key=lambda x: x[-1], reverse=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Examine a subset of incorrect samples\n",
+    "Lets print out the (test id, predicted label, ground truth label, confidence) tuple of first 20 incorrectly labeled samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (3184, 'up', 'two', 0.13125509023666382)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (1966, 'wow', 'no', 0.13236339390277863)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (1415, 'up', 'yes', 0.13250434398651123)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5428, 'nine', 'up', 0.13804833590984344)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (1837, 'up', 'zero', 0.1411990523338318)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (3083, 'four', 'two', 0.14131611585617065)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (885, 'one', 'eight', 0.143906369805336)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5584, 'go', 'cat', 0.14928434789180756)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (6056, 'dog', 'sheila', 0.1584177315235138)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5238, 'up', 'on', 0.15925118327140808)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (3401, 'up', 'go', 0.16477465629577637)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (4700, 'zero', 'off', 0.16728630661964417)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5175, 'up', 'on', 0.1677844226360321)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (1353, 'three', 'yes', 0.17652447521686554)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (6284, 'nine', 'left', 0.18027563393115997)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (6123, 'nine', 'left', 0.18055355548858643)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5793, 'two', 'stop', 0.18353451788425446)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (1136, 'one', 'nine', 0.18678408861160278)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (5042, 'bird', 'on', 0.1876409649848938)\n",
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-25-631305d430a9>:2] (807, 'up', 'eight', 0.19111238420009613)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for incorrect_sample in incorrect_preds[:20]:\n",
+    "    logging.info(str(incorrect_sample))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  Define a threshold below which we designate a model's prediction as \"low confidence\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[NeMo I 2020-02-27 16:14:31 <ipython-input-26-a1b4199a519e>:4] Number of low confidence predictions : 39\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Filter out how many such samples exist\n",
+    "low_confidence_threshold = 0.25\n",
+    "count_low_confidence = len(list(filter(lambda x: x[-1] <= low_confidence_threshold, incorrect_preds)))\n",
+    "logging.info(f\"Number of low confidence predictions : {count_low_confidence}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lets hear the samples which the model has least confidence in !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First lets create a helper function to parse the manifest files\n",
+    "def parse_manifest(manifest):\n",
+    "    data = []\n",
+    "    for line in manifest:\n",
+    "        line = json.loads(line)\n",
+    "        data.append(line)\n",
+    "\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Next, lets create a helper function to actually listen to certain samples\n",
+    "def listen_to_file(sample_id, pred=None, label=None, proba=None):\n",
+    "    # Load the audio waveform using librosa\n",
+    "    filepath = test_samples[sample_id]['audio_filepath']\n",
+    "    audio, sample_rate = librosa.load(filepath)\n",
+    "\n",
+    "    if pred is not None and label is not None and proba is not None:\n",
+    "        logging.info(f\"Sample : {sample_id} Prediction : {pred} Label : {label} Confidence = {proba: 0.4f}\")\n",
+    "    else:\n",
+    "        logging.info(f\"Sample : {sample_id}\")\n",
+    "\n",
+    "    return ipd.Audio(audio, rate=sample_rate)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now lets load the test manifest into memory\n",
+    "test_samples = []\n",
+    "with open(test_dataset, 'r') as test_f:\n",
+    "    test_samples = test_f.readlines()\n",
+    "\n",
+    "test_samples = parse_manifest(test_samples)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally, lets listen to all the audio samples where the model made a mistake\n",
+    "# Note: This list of incorrect samples may be quite large, so you may choose to subsample `incorrect_preds`\n",
+    "for sample_id, pred, label, proba in incorrect_preds[:count_low_confidence]:\n",
+    "    ipd.display(listen_to_file(sample_id, pred=pred, label=label, proba=proba))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/asr/notebooks/4_Online_Data_Augmentation.ipynb b/examples/asr/notebooks/4_Online_Data_Augmentation.ipynb
new file mode 100644
index 000000000000..edbfa3e271b8
--- /dev/null
+++ b/examples/asr/notebooks/4_Online_Data_Augmentation.ipynb
@@ -0,0 +1,1024 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "!pip install wget\n",
+    "!pip install git+https://github.com/NVIDIA/apex.git\n",
+    "!pip install nemo_toolkit[asr]\n",
+    "!pip install unidecode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir configs\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/master/examples/asr/configs/quartznet_speech_commands_3x1_v1.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import some necessary libraries\n",
+    "import os\n",
+    "import random\n",
+    "import argparse\n",
+    "import copy\n",
+    "import math\n",
+    "import os\n",
+    "import glob\n",
+    "from functools import partial\n",
+    "from datetime import datetime\n",
+    "from ruamel.yaml import YAML"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "\n",
+    "Data augmentation is a useful method to improve the performance of models which is applicable across multiple domains. Certain augmentations can also substantially improve robustness of models to noisy samples. \n",
+    "\n",
+    "In this notebook, we describe how to construct an augmentation pipeline inside [Neural Modules (NeMo)](https://github.com/NVIDIA/NeMo), enable augmented training of a [MatchboxNet model](https://arxiv.org/abs/2004.08531) (based on QuartzNet from the paper [\"QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions\"](https://arxiv.org/abs/1910.10261)) and finally how to construct custom augmentations to add to NeMo.\n",
+    "\n",
+    "The notebook will follow the steps below:\n",
+    "\n",
+    " - Dataset preparation: Preparing a noise dataset using an example file.\n",
+    "\n",
+    " - Construct a data augmentation pipeline.\n",
+    " \n",
+    " - Construct a custom augmentation and register it for use in NeMo."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Note\n",
+    "Data augmentation is valuable for many datasets, but it comes at the cost of increased training time if samples are augmented during training time. Certain augmentations are particularly costly, in terms of how much time they take to process a single sample. A few examples of slow augmentations available in NeMo are : \n",
+    "\n",
+    " - Speed Perturbation\n",
+    " - Time Stretch Perturbation (Sample level)\n",
+    " - Noise Perturbation\n",
+    " - Impulse Perturbation\n",
+    " - Time Stretch Augmentation (Batch level, Neural Module)\n",
+    " \n",
+    "For such augmentations, it is advisable to pre-process the dataset offline for a one time preprocessing cost and then train the dataset on this augmented training set."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Taking a Look at Our Data (AN4)\n",
+    "\n",
+    "The AN4 dataset, also known as the Alphanumeric dataset, was collected and published by Carnegie Mellon University. It consists of recordings of people spelling out addresses, names, telephone numbers, etc., one letter or number at a time, as well as their corresponding transcripts. We choose to use AN4 for this tutorial because it is relatively small, with 948 training and 130 test utterances, and so it trains quickly.\n",
+    "\n",
+    "Before we get started, let's download and prepare the dataset. The utterances are available as `.sph` files, so we will need to convert them to `.wav` for later processing. Please make sure you have [Sox](http://sox.sourceforge.net/) installed for this step (see the \"Downloads\" section of the main page)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This is where the an4/ directory will be placed.\n",
+    "# Change this if you don't want the data to be extracted in the current directory.\n",
+    "data_dir = '.'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "import os\n",
+    "import subprocess\n",
+    "import tarfile\n",
+    "import wget\n",
+    "\n",
+    "# Download the dataset. This will take a few moments...\n",
+    "print(\"******\")\n",
+    "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
+    "    an4_url = 'http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz'\n",
+    "    an4_path = wget.download(an4_url, data_dir)\n",
+    "    print(f\"Dataset downloaded at: {an4_path}\")\n",
+    "else:\n",
+    "    print(\"Tarfile already exists.\")\n",
+    "    an4_path = data_dir + '/an4_sphere.tar.gz'\n",
+    "\n",
+    "# Untar and convert .sph to .wav (using sox)\n",
+    "tar = tarfile.open(an4_path)\n",
+    "tar.extractall(path=data_dir)\n",
+    "\n",
+    "print(\"Converting .sph to .wav...\")\n",
+    "sph_list = glob.glob(data_dir + '/an4/**/*.sph', recursive=True)\n",
+    "for sph_path in sph_list:\n",
+    "    wav_path = sph_path[:-4] + '.wav'\n",
+    "    cmd = [\"sox\", sph_path, wav_path]\n",
+    "    subprocess.run(cmd)\n",
+    "print(\"Finished conversion.\\n******\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare the path to manifest files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_basedir = os.path.join(data_dir, 'an4')\n",
+    "\n",
+    "train_dataset = os.path.join(dataset_basedir, 'train_manifest.json')\n",
+    "test_dataset = os.path.join(dataset_basedir, 'test_manifest.json')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read a few rows of the manifest file \n",
+    "\n",
+    "Manifest files are the data structure used by NeMo to declare a few important details about the data :\n",
+    "\n",
+    "1) `audio_filepath`: Refers to the path to the raw audio file <br>\n",
+    "2) `text`: The text transcript of this sample <br>\n",
+    "3) `duration`: The length of the audio file, in seconds."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!head -n 5 {train_dataset}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Augmentation Pipeline\n",
+    "\n",
+    "Constructing a data augmentation pipeline in NeMo is as simple as composing a nested dictionary that describes two things :\n",
+    "\n",
+    "1) The probability of that augmentation occuring - using the `prob` keyword <br>\n",
+    "2) The keyword arguments required by that augmentation class\n",
+    "\n",
+    "Below, we show a few samples of these augmentations. Note, in order to distinguish between the original sample and the perturbed sample, we exaggerate the perturbation strength significantly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import librosa\n",
+    "import json\n",
+    "import IPython.display as ipd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Audio file preparation "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import the data augmentation component from ASR collection\n",
+    "from nemo.collections.asr.parts import perturb, segment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets see the available perturbations\n",
+    "perturb.perturbation_types"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Obtain a baseline audio file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filepath = librosa.util.example_audio_file()\n",
+    "sample, sr = librosa.core.load(filepath)\n",
+    "\n",
+    "ipd.Audio(sample, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Convert to WAV format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import soundfile as sf\n",
+    "\n",
+    "# lets convert this ogg file into a wave to be compatible with NeMo\n",
+    "if not os.path.exists('./media'):\n",
+    "    os.makedirs('./media/')\n",
+    "    \n",
+    "filename = 'Kevin_MacLeod_-_Vibe_Ace.wav'\n",
+    "filepath = os.path.join('media', filename)\n",
+    "\n",
+    "sf.write(filepath, sample, samplerate=sr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample, sr = librosa.core.load(filepath)\n",
+    "ipd.Audio(sample, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NeMo has its own support class for loading wav files\n",
+    "def load_audio() -> segment.AudioSegment:\n",
+    "    filename = 'Kevin_MacLeod_-_Vibe_Ace.wav'\n",
+    "    filepath = os.path.join('media', filename)\n",
+    "    sample_segment = segment.AudioSegment.from_file(filepath, target_sr=sr)\n",
+    "    return sample_segment\n",
+    "\n",
+    "sample_segment = load_audio()\n",
+    "ipd.Audio(sample_segment.samples, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## White Noise Perturbation\n",
+    "\n",
+    "White Noise perturbation is performed by the following steps : <br>\n",
+    "1) Randomly sample the amplitude of the noise from a uniformly distributed range (defined in dB) <br>\n",
+    "2) Sample gaussian noise (mean = 0, std = 1) with same length as audio signal <br>\n",
+    "3) Scale this gaussian noise by the amplitude (in dB scale) <br>\n",
+    "4) Add this noise vector to the original sample\n",
+    "\n",
+    "Notably, the original signal should not have a \"hissing sound\" constantly present in the perturbed version."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "white_noise = perturb.WhiteNoisePerturbation(min_level=-50, max_level=-30)\n",
+    "\n",
+    "# Perturb the audio file\n",
+    "sample_segment = load_audio()\n",
+    "white_noise.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Gain Perturbation\n",
+    "\n",
+    "Gain perturbation is performed by the following steps : <br>\n",
+    "1) Randomly sample the gain factor of the signal from a uniformly distributed range (defined in dB) <br>\n",
+    "2) Scale this original signal by the gain factor (in dB scale) <br>\n",
+    "\n",
+    "Notably, the tone of the original audio should sound slightly different as compared to the gain perturbed sample."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gain = perturb.GainPerturbation(min_gain_dbfs=25, max_gain_dbfs=50)\n",
+    "\n",
+    "# Perturb the audio file \n",
+    "sample_segment = load_audio()\n",
+    "gain.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Shift Perturbation\n",
+    "\n",
+    "Shift perturbation is performed by the following steps : <br>\n",
+    "1) Randomly sample the shift factor of the signal from a uniformly distributed range (defined in milliseconds) <br>\n",
+    "2) Depending on the sign of the shift, we shift the original signal to the left or the right. <br>\n",
+    "3) The boundary locations are filled with zeros after the shift of the signal <br>\n",
+    "\n",
+    "Notably, the perturbed signal below skips the first 25 to 50 seconds of the original audio below, and the remainder of the time is simply silence. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shift = perturb.ShiftPerturbation(min_shift_ms=25000.0, max_shift_ms=50000.0)\n",
+    "\n",
+    "# Perturb the audio file \n",
+    "sample_segment = load_audio()\n",
+    "shift.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Dependent Perturbations\n",
+    "\n",
+    "Some perturbations require an external data source in order to perturb the original sample. Noise Perturbation is a perfect example of one such augmentation that requires an external noise source dataset in order to pertur the original data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets prepare a manifest file using the baseline file itself, cut into 1 second segments\n",
+    "\n",
+    "def write_manifest(filepath, data_dir='./media/', manifest_name='noise_manifest', duration_max=None, duration_stride=1.0, filter_long=False, duration_limit=10.0):\n",
+    "    if duration_max is None:\n",
+    "        duration_max = 1e9\n",
+    "                \n",
+    "    with open(os.path.join(data_dir, manifest_name + '.json'), 'w') as fout:\n",
+    "        \n",
+    "        try:\n",
+    "            x, _sr = librosa.load(filepath)\n",
+    "            duration = librosa.get_duration(x, sr=_sr)\n",
+    "\n",
+    "        except Exception:\n",
+    "            print(f\"\\n>>>>>>>>> WARNING: Librosa failed to load file {filepath}. Skipping this file !\\n\")\n",
+    "            return\n",
+    "\n",
+    "        if filter_long and duration > duration_limit:\n",
+    "            print(f\"Skipping sound sample {filepath}, exceeds duration limit of {duration_limit}\")\n",
+    "            return\n",
+    "\n",
+    "        offsets = []\n",
+    "        durations = []\n",
+    "\n",
+    "        if duration > duration_max:\n",
+    "            current_offset = 0.0\n",
+    "\n",
+    "            while current_offset < duration:\n",
+    "                difference = duration - current_offset\n",
+    "                segment_duration = min(duration_max, difference)\n",
+    "\n",
+    "                offsets.append(current_offset)\n",
+    "                durations.append(segment_duration)\n",
+    "\n",
+    "                current_offset += duration_stride\n",
+    "\n",
+    "        else:\n",
+    "            offsets.append(0.0)\n",
+    "            durations.append(duration)\n",
+    "\n",
+    "\n",
+    "        for duration, offset in zip(durations, offsets):\n",
+    "            metadata = {\n",
+    "                'audio_filepath': filepath,\n",
+    "                'duration': duration,\n",
+    "                'label': 'noise',\n",
+    "                'text': '_',  # for compatibility with ASRAudioText collection\n",
+    "                'offset': offset,\n",
+    "            }\n",
+    "\n",
+    "            json.dump(metadata, fout)\n",
+    "            fout.write('\\n')\n",
+    "            fout.flush()\n",
+    "\n",
+    "        print(f\"Wrote {len(durations)} segments for filename {filename}\")\n",
+    "            \n",
+    "    print(\"Finished preparing manifest !\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filename = 'Kevin_MacLeod_-_Vibe_Ace.wav'\n",
+    "filepath = os.path.join('media', filename)\n",
+    "\n",
+    "# Write a \"noise\" manifest file\n",
+    "write_manifest(filepath, manifest_name='noise_1s', duration_max=1.0, duration_stride=1.0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets read this noise manifest file\n",
+    "noise_manifest_path = os.path.join('media', 'noise_1s.json')\n",
+    "\n",
+    "!head -n 5 {noise_manifest_path}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets create a helper method to load the first file in the train dataset of AN4\n",
+    "# Load the first sample in the manifest\n",
+    "def load_gsc_sample() -> segment.AudioSegment:\n",
+    "    with open(train_dataset, 'r') as f:\n",
+    "        line = f.readline()\n",
+    "        \n",
+    "    line = json.loads(line)\n",
+    "    gsc_filepath = line['audio_filepath']\n",
+    "    sample_segment = segment.AudioSegment.from_file(gsc_filepath)\n",
+    "    return sample_segment\n",
+    "\n",
+    "gsc_sample_segment = load_gsc_sample()\n",
+    "ipd.Audio(gsc_sample_segment.samples, rate=16000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Noise Augmentation\n",
+    "\n",
+    "Noise perturbation is performed by the following steps : <br>\n",
+    "1) Randomly sample the amplitude scale of the noise sample from a uniformly distributed range (defined in dB) <br>\n",
+    "2) Randomly choose an audio clip from the set of noise audio samples available <br>\n",
+    "3) Compute the gain (in dB) required for the noise clip as compared to the original sample and scale the noise by this factor <br>\n",
+    "4) If the noise snippet is of shorter duration than the original audio, then randomly select an index in time from the original sample, where the noise snippet will be added <br>\n",
+    "5) If instead the noise snippet is longer than the duration of the original audio, then randomly subsegment the noise snippet and add the full snippet to the original audio <br>\n",
+    "\n",
+    "Notably, the noise perturbed sample should sound as if there are two sounds playing at the same time (overlapping audio) as compared to the original signal. The magnitude of the noise will be dependent on step (3) and the location where the noise is added will depend on steps (4) and (5)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rng = random.Random(0)\n",
+    "noise = perturb.NoisePerturbation(manifest_path=noise_manifest_path,\n",
+    "                                  min_snr_db=-10, max_snr_db=-10,\n",
+    "                                  max_gain_db=300.0, rng=rng)\n",
+    "\n",
+    "# Perturb the audio file \n",
+    "sample_segment = load_gsc_sample()\n",
+    "noise.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=16000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Speed Perturbation\n",
+    "\n",
+    "Speed perturbation changes the speed of the speech, but does not preserve pitch of the sound. Try a few random augmentations to see how the pitch changes with change in duration of the audio file.\n",
+    "\n",
+    "**Note**: This is a very slow augmentation and is not advised to perform online augmentation for large datasets as it can dramatically increase training time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resample_type = 'kaiser_best'  # Can be ['kaiser_best', 'kaiser_fast', 'fft', 'scipy']\n",
+    "speed = perturb.SpeedPerturbation(sr, resample_type, min_speed_rate=0.5, max_speed_rate=2.0, num_rates=-1)\n",
+    "\n",
+    "# Perturb the audio file \n",
+    "sample_segment = load_gsc_sample()\n",
+    "speed.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=16000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Time Stretch Perturbation\n",
+    "\n",
+    "Time Stretch perturbation changes the speed of the speech, and also preserve pitch of the sound. \n",
+    "Try a few random augmentations to see how the pitch remains close to the same with change in duration of the audio file."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Note about speed optimizations\n",
+    "\n",
+    "Time stretch is a costly augmentation, and can easily cause training time to increase drastically. It is suggested that one installs the `numba` library using conda to use a more optimized augmentation kernel.\n",
+    "\n",
+    "```python\n",
+    "conda install numba\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "time_stretch = perturb.TimeStretchPerturbation(min_speed_rate=0.5, max_speed_rate=2.0, num_rates=-1)\n",
+    "\n",
+    "# Perturb the audio file \n",
+    "sample_segment = load_gsc_sample()\n",
+    "time_stretch.perturb(sample_segment)\n",
+    "\n",
+    "ipd.Audio(sample_segment.samples, rate=16000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Augmentation Pipeline\n",
+    "\n",
+    "The augmentation pipeline can be constructed in multiple ways, either explicitly by instantiating the objects of these perturbations or implicitly by providing the arguments to these augmentations as a nested dictionary.\n",
+    "\n",
+    "We will show both approaches in the following sections"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Explicit definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import the data augmentation component from ASR collection\n",
+    "from nemo.collections.asr.parts import perturb, segment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Instantiate the perturbations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "perturbations = [\n",
+    "    perturb.WhiteNoisePerturbation(min_level=-90, max_level=-46),\n",
+    "    perturb.GainPerturbation(min_gain_dbfs=0, max_gain_dbfs=50),\n",
+    "    perturb.NoisePerturbation(manifest_path=noise_manifest_path,\n",
+    "                              min_snr_db=0, max_snr_db=50, max_gain_db=300.0)\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Select chance of perturbations being applied"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "probas = [1.0, 1.0, 0.5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Prepare the audio augmentation object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "augmentations = list(zip(probas, perturbations))\n",
+    "\n",
+    "audio_augmentations = perturb.AudioAugmentor(augmentations)\n",
+    "audio_augmentations._pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Implicit definition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "perturb.perturbation_types  # Available perturbations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Prepare the nested dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "audio_augmentations = dict(\n",
+    "    white_noise = dict(\n",
+    "        prob=1.0,\n",
+    "        min_level=-90,\n",
+    "        max_level=-46\n",
+    "    ),\n",
+    "    gain = dict(\n",
+    "        prob=1.0,\n",
+    "        min_gain_dbfs=0,\n",
+    "        max_gain_dbfs=50\n",
+    "    ),\n",
+    "    noise = dict(\n",
+    "        prob=0.5,\n",
+    "        manifest_path=noise_manifest_path,\n",
+    "        min_snr_db=0,\n",
+    "        max_snr_db=50,\n",
+    "        max_gain_db=300.0\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "audio_augmentations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Supply `audio_augmentations` as an argument to AudioToTextDataLayer or AudioToSpeechLabelDataLayer\n",
+    "\n",
+    "Both of these data layers accept an optional keyword argument `augmentor`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nemo.collections import asr as nemo_asr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "?nemo_asr.AudioToTextDataLayer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "?nemo_asr.AudioToSpeechLabelDataLayer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training - Application of augmentations\n",
+    "\n",
+    "We will be describing the data loaders for a MatchboxNet model from the paper \"[MatchboxNet: 1D Time-Channel Separable Convolutional Neural Network Architecture for Speech Commands Recognition](https://arxiv.org/abs/2004.08531)\". The benefit of MatchboxNet over JASPER models is that they use Separable Convolutions, which greatly reduce the number of parameters required to get good model accuracy.\n",
+    "\n",
+    "Care must be taken not to apply augmentations to the test set.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets load the config file for the QuartzNet 3x1 model\n",
+    "# Here we will be using separable convolutions\n",
+    "# with 3 blocks (k=3 repeated once r=1 from the picture above)\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(\"configs/quartznet_speech_commands_3x1_v1.yaml\") as f:\n",
+    "    jasper_params = yaml.load(f)\n",
+    "\n",
+    "# Pre-define a set of labels that this model must learn to predict\n",
+    "labels = jasper_params['labels']\n",
+    "\n",
+    "# Get the sampling rate of the data\n",
+    "sample_rate = jasper_params['sample_rate']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import NeMo core functionality\n",
+    "# NeMo's \"core\" package\n",
+    "import nemo\n",
+    "# NeMo's ASR collection\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "# NeMo's learning rate policy\n",
+    "from nemo.utils.lr_policies import CosineAnnealing\n",
+    "from nemo.collections.asr.helpers import (\n",
+    "    monitor_classification_training_progress,\n",
+    "    process_classification_evaluation_batch,\n",
+    "    process_classification_evaluation_epoch,\n",
+    ")\n",
+    "from nemo.collections.asr.metrics import classification_accuracy\n",
+    "\n",
+    "from nemo.utils import logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the NeMo components"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a Neural Factory\n",
+    "# It creates log files and tensorboard writers for us among other functions\n",
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "    log_dir='./{0}/quartznet-3x1-v1'.format(dataset_basedir),\n",
+    "    create_tb_writer=True)\n",
+    "tb_writer = neural_factory.tb_writer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Provide the augmentations to the Training AudioToSpeechDataLayer or AudioToTextDataLayer\n",
+    "\n",
+    "Thats it ! Now your training samples will be augmented during training !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build the input data layer and the preprocessing layers for the train set\n",
+    "train_data_layer = nemo_asr.AudioToTextDataLayer(\n",
+    "    manifest_filepath=train_dataset,\n",
+    "    labels=labels,\n",
+    "    sample_rate=sample_rate,\n",
+    "    batch_size=32,\n",
+    "    num_workers=os.cpu_count(),\n",
+    "    shuffle=True\n",
+    "    augmentor=audio_augmentations,  # Add your augmentations (implicit or explicit)\n",
+    ")\n",
+    "\n",
+    " # Build the input data layer and the preprocessing layers for the test set\n",
+    "eval_data_layer = nemo_asr.AudioToTextDataLayer(\n",
+    "    manifest_filepath=test_dataset,\n",
+    "    sample_rate=sample_rate,\n",
+    "    labels=labels,\n",
+    "    batch_size=32,\n",
+    "    num_workers=os.cpu_count(),\n",
+    "    shuffle=False, \n",
+    "    augmentor=None  # Make sure not to add augmentations to the test set !\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Custom Perturbations\n",
+    "\n",
+    "We can define and use custom perturbations as required simply by extending the `Perturbation` class. \n",
+    "\n",
+    "Lets look at how we can build a custom Noise Perturbation that we can use to evaluate the effect of noise at inference time, in order to analyse the model's robustness to noise\n",
+    "\n",
+    "In evaluation mode, we want to set an explicit value for the `snr_db` parameter instead of uniformly sample it from a range. This allows us to control the signal to noise ratio without relying on randomness from the training implementation of `NoisePerturbation`.\n",
+    "\n",
+    "Further, we force a random seed in order to produce reproduceable results on the evaluation set.\n",
+    "\n",
+    "With this combination, we can easily evaluate each sample in the test set `S` times (`S` being the number of random seeds), and can evaluate each of these samples at `D` levels of Signal to Noise Ratio (in dB). "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We use a NeMo utility to parse the manifest file for us\n",
+    "from nemo.collections.asr.parts import collections, parsers\n",
+    "\n",
+    "class NoisePerturbationEval(perturb.Perturbation):\n",
+    "    def __init__(\n",
+    "        self, manifest_path=None, snr_db=40, max_gain_db=300.0, seed=None,\n",
+    "    ):\n",
+    "        seed = seed if seed is not None else 0\n",
+    "        self._manifest = collections.ASRAudioText(manifest_path, parser=parsers.make_parser([]))\n",
+    "        self._snr_db = snr_db\n",
+    "        self._max_gain_db = max_gain_db\n",
+    "        self._rng = random.Random(seed)\n",
+    "    \n",
+    "    # This is mostly obtained from the original NoisePerturbation class itself\n",
+    "    def perturb(self, data):\n",
+    "        snr_db = self._snr_db\n",
+    "        noise_record = self._rng.sample(self._manifest.data, 1)[0]\n",
+    "        noise = AudioSegment.from_file(noise_record.audio_file, target_sr=data.sample_rate)\n",
+    "        noise_gain_db = min(data.rms_db - noise.rms_db - snr_db, self._max_gain_db)\n",
+    "\n",
+    "        # calculate noise segment to use\n",
+    "        start_time = 0.0\n",
+    "        if noise.duration > (start_time + data.duration):\n",
+    "            noise.subsegment(start_time=start_time, end_time=start_time + data.duration)\n",
+    "\n",
+    "        # adjust gain for snr purposes and superimpose\n",
+    "        noise.gain_db(noise_gain_db)\n",
+    "\n",
+    "        if noise._samples.shape[0] < data._samples.shape[0]:\n",
+    "            noise_idx = data._samples.shape[0] // 2  # midpoint of audio\n",
+    "            while (noise_idx + noise._samples.shape[0]) > data._samples.shape[0]:\n",
+    "                noise_idx = noise_idx // 2  # half the initial starting point\n",
+    "\n",
+    "            data._samples[noise_idx: noise_idx + noise._samples.shape[0]] += noise._samples\n",
+    "\n",
+    "        else:\n",
+    "            data._samples += noise._samples\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Registering augmentations\n",
+    "\n",
+    "We can use either approach to submit this test time augmentation to the Data Loaders.\n",
+    "\n",
+    "In order to obtain the convenience of the implicit method, we must register this augmentation into NeMo's directory of available augmentations. This can be done as follows -"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "perturb.register_perturbation(name='noise_eval', perturbation=NoisePerturbationEval)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets check the registry of allowed perturbations !\n",
+    "perturb.perturbation_types"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Note\n",
+    "\n",
+    "It is not allowed to overwrite already registered perturbations using the `perturb.register_perturbation` method. It will raise a `ValueError` in order to prevent overwriting the pre-existing perturbation types"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.7.6 64-bit ('NeMo': conda)",
+   "language": "python",
+   "name": "python37664bitnemoconda43f94a748a2e4953b0129556ecdf4f62"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/asr/notebooks/5_Online_Speech_Commands_Microphone_Demo.ipynb b/examples/asr/notebooks/5_Online_Speech_Commands_Microphone_Demo.ipynb
new file mode 100644
index 000000000000..2ed61fb942d8
--- /dev/null
+++ b/examples/asr/notebooks/5_Online_Speech_Commands_Microphone_Demo.ipynb
@@ -0,0 +1,436 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook demonstrates speech command recognition from a microphone's stream in NeMo.\n",
+    "\n",
+    "It is **not a recommended** way to do inference in production workflows. If you are interested in \n",
+    "production-level inference using NeMo ASR models, please sign-up to Jarvis early access program: https://developer.nvidia.com/nvidia-jarvis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The notebook requires PyAudio library to get a signal from an audio device.\n",
+    "For Ubuntu, please run the following commands to install it:\n",
+    "```\n",
+    "sudo apt-get install -y portaudio19-dev\n",
+    "pip install pyaudio\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "import numpy as np\n",
+    "import pyaudio as pa\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model Architecture and Weights\n",
+    "\n",
+    "The model architecture is defined in a YAML file available in the config directory. MatchboxNet 3x1x64 has been trained on the Google Speech Commands dataset (v2) version, and these weights are available on NGC. They will automatically be downloaded if not found."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the checkpoints are available from NGC: https://ngc.nvidia.com/catalog/models/nvidia:google_speech_commands_v2___matchboxnet_3x1x1\n",
+    "MODEL_YAML = '../configs/quartznet_speech_commands_3x1_v2.yaml'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download the checkpoint files\n",
+    "base_checkpoint_path = './checkpoints/matchboxnet_v2-3x1x64/'\n",
+    "CHECKPOINT_ENCODER = os.path.join(base_checkpoint_path, 'JasperEncoder-STEP-89000.pt')\n",
+    "CHECKPOINT_DECODER = os.path.join(base_checkpoint_path, 'JasperDecoderForClassification-STEP-89000.pt')\n",
+    "\n",
+    "if not os.path.exists(base_checkpoint_path):\n",
+    "    os.makedirs(base_checkpoint_path)\n",
+    "    \n",
+    "if not os.path.exists(CHECKPOINT_ENCODER):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/google_speech_commands_v2___matchboxnet_3x1x1/versions/1/files/JasperEncoder-STEP-89000.pt -P {base_checkpoint_path};\n",
+    "\n",
+    "if not os.path.exists(CHECKPOINT_DECODER):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/google_speech_commands_v2___matchboxnet_3x1x1/versions/1/files/JasperDecoderForClassification-STEP-89000.pt -P {base_checkpoint_path};"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Construct the Neural Modules and the eval graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ruamel.yaml import YAML\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(MODEL_YAML) as f:\n",
+    "    model_definition = yaml.load(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "    placement=nemo.core.DeviceType.GPU,\n",
+    "    backend=nemo.core.Backend.PyTorch)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define a Neural Module to iterate over audio\n",
+    "\n",
+    "Here we define a custom Neural Module which acts as an iterator over a stream of audio that is supplied to it. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nemo.backends.pytorch.nm import DataLayerNM\n",
+    "from nemo.core.neural_types import NeuralType, AudioSignal, LengthsType\n",
+    "import torch\n",
+    "\n",
+    "# simple data layer to pass audio signal\n",
+    "class AudioDataLayer(DataLayerNM):\n",
+    "    @property\n",
+    "    def output_ports(self):\n",
+    "        return {\n",
+    "            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),\n",
+    "            'a_sig_length': NeuralType(tuple('B'), LengthsType()),\n",
+    "        }\n",
+    "\n",
+    "    def __init__(self, sample_rate):\n",
+    "        super().__init__()\n",
+    "        self._sample_rate = sample_rate\n",
+    "        self.output = True\n",
+    "        \n",
+    "    def __iter__(self):\n",
+    "        return self\n",
+    "    \n",
+    "    def __next__(self):\n",
+    "        if not self.output:\n",
+    "            raise StopIteration\n",
+    "        self.output = False\n",
+    "        return torch.as_tensor(self.signal, dtype=torch.float32), \\\n",
+    "               torch.as_tensor(self.signal_shape, dtype=torch.int64)\n",
+    "        \n",
+    "    def set_signal(self, signal):\n",
+    "        self.signal = np.reshape(signal.astype(np.float32)/32768., [1, -1])\n",
+    "        self.signal_shape = np.expand_dims(self.signal.size, 0).astype(np.int64)\n",
+    "        self.output = True\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return 1\n",
+    "\n",
+    "    @property\n",
+    "    def dataset(self):\n",
+    "        return None\n",
+    "\n",
+    "    @property\n",
+    "    def data_iterator(self):\n",
+    "        return self"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiate the Neural Modules\n",
+    "\n",
+    "We now instantiate the neural modules and the encoder and decoder, set the weights of these models with the downloaded pretrained weights and construct the DAG to evaluate MatchboxNet on audio streams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate necessary neural modules\n",
+    "data_layer = AudioDataLayer(sample_rate=model_definition['sample_rate'])\n",
+    "\n",
+    "data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(\n",
+    "    **model_definition['AudioToMFCCPreprocessor'])\n",
+    "\n",
+    "jasper_encoder = nemo_asr.JasperEncoder(\n",
+    "    **model_definition['JasperEncoder'])\n",
+    "\n",
+    "jasper_decoder = nemo_asr.JasperDecoderForClassification(\n",
+    "    feat_in=model_definition['JasperEncoder']['jasper'][-1]['filters'],\n",
+    "    num_classes=len(model_definition['labels']))\n",
+    "\n",
+    "# load pre-trained model\n",
+    "jasper_encoder.restore_from(CHECKPOINT_ENCODER)\n",
+    "jasper_decoder.restore_from(CHECKPOINT_DECODER)\n",
+    "\n",
+    "# Define inference DAG\n",
+    "audio_signal, audio_signal_len = data_layer()\n",
+    "processed_signal, processed_signal_len = data_preprocessor(\n",
+    "    input_signal=audio_signal,\n",
+    "    length=audio_signal_len)\n",
+    "encoded, encoded_len = jasper_encoder(audio_signal=processed_signal,\n",
+    "                                      length=processed_signal_len)\n",
+    "log_probs = jasper_decoder(encoder_output=encoded)\n",
+    "\n",
+    "# inference method for audio signal (single instance)\n",
+    "def infer_signal(self, signal):\n",
+    "    data_layer.set_signal(signal)\n",
+    "    tensors = self.infer([log_probs], verbose=False)\n",
+    "    logits = tensors[0][0]\n",
+    "    return logits\n",
+    "\n",
+    "neural_factory.infer_signal = infer_signal.__get__(neural_factory)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# FrameASR: Helper class for streaming inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# class for streaming frame-based ASR\n",
+    "# 1) use reset() method to reset FrameASR's state\n",
+    "# 2) call transcribe(frame) to do ASR on\n",
+    "#    contiguous signal's frames\n",
+    "class FrameASR:\n",
+    "    \n",
+    "    def __init__(self, neural_factory, model_definition,\n",
+    "                 frame_len=2, frame_overlap=2.5, \n",
+    "                 offset=10):\n",
+    "        '''\n",
+    "        Args:\n",
+    "          frame_len: frame's duration, seconds\n",
+    "          frame_overlap: duration of overlaps before and after current frame, seconds\n",
+    "          offset: number of symbols to drop for smooth streaming\n",
+    "        '''\n",
+    "        self.vocab = list(model_definition['labels'])\n",
+    "        self.vocab.append('_')\n",
+    "        \n",
+    "        self.sr = model_definition['sample_rate']\n",
+    "        self.frame_len = frame_len\n",
+    "        self.n_frame_len = int(frame_len * self.sr)\n",
+    "        self.frame_overlap = frame_overlap\n",
+    "        self.n_frame_overlap = int(frame_overlap * self.sr)\n",
+    "        timestep_duration = model_definition['AudioToMFCCPreprocessor']['window_stride']\n",
+    "        for block in model_definition['JasperEncoder']['jasper']:\n",
+    "            timestep_duration *= block['stride'][0] ** block['repeat']\n",
+    "        self.buffer = np.zeros(shape=2*self.n_frame_overlap + self.n_frame_len,\n",
+    "                               dtype=np.float32)\n",
+    "        self.offset = offset\n",
+    "        self.reset()\n",
+    "        \n",
+    "    def _decode(self, frame, offset=0):\n",
+    "        assert len(frame)==self.n_frame_len\n",
+    "        self.buffer[:-self.n_frame_len] = self.buffer[self.n_frame_len:]\n",
+    "        self.buffer[-self.n_frame_len:] = frame\n",
+    "        logits = neural_factory.infer_signal(self.buffer).to('cpu').numpy()[0]\n",
+    "        decoded = self._greedy_decoder(\n",
+    "            logits, \n",
+    "            self.vocab\n",
+    "        )\n",
+    "        return decoded[:len(decoded)-offset]\n",
+    "    \n",
+    "    def transcribe(self, frame=None):\n",
+    "        if frame is None:\n",
+    "            frame = np.zeros(shape=self.n_frame_len, dtype=np.float32)\n",
+    "        if len(frame) < self.n_frame_len:\n",
+    "            frame = np.pad(frame, [0, self.n_frame_len - len(frame)], 'constant')\n",
+    "        unmerged = self._decode(frame, self.offset)\n",
+    "        \n",
+    "        return unmerged\n",
+    "    \n",
+    "    def reset(self):\n",
+    "        '''\n",
+    "        Reset frame_history and decoder's state\n",
+    "        '''\n",
+    "        self.buffer=np.zeros(shape=self.buffer.shape, dtype=np.float32)\n",
+    "        self.prev_char = ''\n",
+    "\n",
+    "    @staticmethod\n",
+    "    def _greedy_decoder(logits, vocab):\n",
+    "        s = ''\n",
+    "        \n",
+    "        if logits.shape[0]:\n",
+    "            s += str(vocab[np.argmax(logits)]) + \"\\n\"\n",
+    "            \n",
+    "        return s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# duration of signal frame, seconds\n",
+    "FRAME_LEN = 0.25\n",
+    "# number of audio channels (expect mono signal)\n",
+    "CHANNELS = 1\n",
+    "# sample rate, Hz\n",
+    "RATE = 16000\n",
+    "\n",
+    "CHUNK_SIZE = int(FRAME_LEN*RATE)\n",
+    "asr = FrameASR(neural_factory, model_definition,\n",
+    "               frame_len=FRAME_LEN, frame_overlap=2.0, \n",
+    "               offset=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# What classes can this model recognize?\n",
+    "\n",
+    "Before we begin inference on the actual audio stream, lets look at what are the classes this model was trained to recognize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = model_definition['labels']\n",
+    "\n",
+    "for i in range(7):\n",
+    "    for j in range(5):\n",
+    "        print('%-10s' % (labels[i * 5 + j]), end=' ')\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Begin listening to audio stream and perform inference using FrameASR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p = pa.PyAudio()\n",
+    "print('Available audio input devices:')\n",
+    "for i in range(p.get_device_count()):\n",
+    "    dev = p.get_device_info_by_index(i)\n",
+    "    if dev.get('maxInputChannels'):\n",
+    "        print(i, dev.get('name'))\n",
+    "print('Please type input device ID:')\n",
+    "dev_idx = int(input())\n",
+    "\n",
+    "empty_counter = 0\n",
+    "\n",
+    "def callback(in_data, frame_count, time_info, status):\n",
+    "    global empty_counter\n",
+    "    signal = np.frombuffer(in_data, dtype=np.int16)\n",
+    "    text = asr.transcribe(signal)\n",
+    "    if len(text):\n",
+    "        print(text,end='')\n",
+    "        empty_counter = 3\n",
+    "    elif empty_counter > 0:\n",
+    "        empty_counter -= 1\n",
+    "        if empty_counter == 0:\n",
+    "            print(' ',end='')\n",
+    "    return (in_data, pa.paContinue)\n",
+    "\n",
+    "stream = p.open(format=pa.paInt16,\n",
+    "                channels=CHANNELS,\n",
+    "                rate=RATE,\n",
+    "                input=True,\n",
+    "                input_device_index=dev_idx,\n",
+    "                stream_callback=callback,\n",
+    "                frames_per_buffer=CHUNK_SIZE)\n",
+    "\n",
+    "print('Listening...')\n",
+    "\n",
+    "stream.start_stream()\n",
+    "\n",
+    "while stream.is_active():\n",
+    "    time.sleep(0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream.stop_stream()\n",
+    "stream.close()\n",
+    "p.terminate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/asr/notebooks/README.md b/examples/asr/notebooks/README.md
index 9519f21fc546..2cfa65bb68cf 100644
--- a/examples/asr/notebooks/README.md
+++ b/examples/asr/notebooks/README.md
@@ -10,10 +10,16 @@ We recommend that you start with that if you are either new to ASR, or new to Ne
 
 You should be able to import the notebook from Google Colab by using the "Upload from GitHub" option.
 
-
 2. [Online Automatic Speech Recognition from a Microphone](./2_Online_ASR_Microphone_Demo.ipynb)
 
 The notebook demonstrates automatic speech recognition (ASR) from a microphone's stream in NeMo.
 
 It is **not a recommended** way to do inference in production workflows. If you are interested in a production-level inference using NeMo ASR models, please sign-up to [Jarvis early access program](https://developer.nvidia.com/nvidia-jarvis)
 
+3. [Speech Commands in NeMo](./3_Speech_Commands_using_NeMo.ipynb)
+
+This tutorial builds upon the introduction to ASR and covers the basics of speech command detection using the Google Speech Commands dataset.
+Inspite of the small size of the model, advanced augmentation schemes such as SpecAugment can deliver high performance models.
+
+We further analyse classification errors made by the model, and listen to samples which are predicted with least confidence by the trained model.
+This exercise can be valuable when attempting to diagnose issues with the model or inspecting the dataset for inaccurate labelling. 
diff --git a/examples/asr/jasper.py b/examples/asr/other/jasper.py
similarity index 94%
rename from examples/asr/jasper.py
rename to examples/asr/other/jasper.py
index 2e276ed64f23..10b4d5d47f5e 100644
--- a/examples/asr/jasper.py
+++ b/examples/asr/other/jasper.py
@@ -11,6 +11,7 @@
 import nemo.collections.asr as nemo_asr
 import nemo.utils.argparse as nm_argparse
 from nemo.collections.asr.helpers import monitor_asr_train_progress, process_evaluation_batch, process_evaluation_epoch
+from nemo.utils import logging
 from nemo.utils.lr_policies import CosineAnnealing
 
 
@@ -94,7 +95,7 @@ def create_all_dags(args, neural_factory):
 
     N = len(data_layer)
     steps_per_epoch = math.ceil(N / (args.batch_size * args.iter_per_step * args.num_gpus))
-    nemo.logging.info('Have {0} examples to train on.'.format(N))
+    logging.info('Have {0} examples to train on.'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"],
@@ -127,7 +128,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        nemo.logging.warning("There were no val datasets passed")
+        logging.warning("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"],
@@ -141,13 +142,11 @@ def create_all_dags(args, neural_factory):
 
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    nemo.logging.info('================================')
-    nemo.logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    nemo.logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    nemo.logging.info(
-        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
-    )
-    nemo.logging.info('================================')
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
+    logging.info('================================')
 
     # Train DAG
     (audio_signal_t, a_sig_length_t, transcript_t, transcript_len_t,) = data_layer()
@@ -239,7 +238,7 @@ def main():
 
     checkpoint_dir = neural_factory.checkpoint_dir
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(args, neural_factory)
diff --git a/examples/asr/jasper_aishell.py b/examples/asr/other/jasper_aishell.py
similarity index 93%
rename from examples/asr/jasper_aishell.py
rename to examples/asr/other/jasper_aishell.py
index 67bafeafdf00..0ee584507909 100644
--- a/examples/asr/jasper_aishell.py
+++ b/examples/asr/other/jasper_aishell.py
@@ -10,6 +10,7 @@
 import nemo.collections.asr as nemo_asr
 import nemo.utils.argparse as nm_argparse
 from nemo.collections.asr.helpers import monitor_asr_train_progress, process_evaluation_batch, process_evaluation_epoch
+from nemo.utils import logging
 from nemo.utils.lr_policies import SquareAnnealing
 
 
@@ -96,7 +97,7 @@ def create_all_dags(args, neural_factory):
 
     N = len(data_layer)
     steps_per_epoch = int(N / (args.batch_size * args.num_gpus))
-    nemo.logging.info('Have {0} examples to train on.'.format(N))
+    logging.info('Have {0} examples to train on.'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"],
@@ -130,7 +131,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        nemo.logging.warning("There were no val datasets passed")
+        logging.warning("There were no val datasets passed")
 
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=jasper_params["AudioToMelSpectrogramPreprocessor"]["features"], **jasper_params["JasperEncoder"],
@@ -144,13 +145,11 @@ def create_all_dags(args, neural_factory):
 
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    nemo.logging.info('================================')
-    nemo.logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    nemo.logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    nemo.logging.info(
-        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
-    )
-    nemo.logging.info('================================')
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
+    logging.info('================================')
 
     # Train DAG
     (audio_signal_t, a_sig_length_t, transcript_t, transcript_len_t,) = data_layer()
@@ -242,7 +241,7 @@ def main():
 
     checkpoint_dir = neural_factory.checkpoint_dir
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(args, neural_factory)
diff --git a/examples/asr/jasper_aishell_infer.py b/examples/asr/other/jasper_aishell_infer.py
similarity index 89%
rename from examples/asr/jasper_aishell_infer.py
rename to examples/asr/other/jasper_aishell_infer.py
index 919355731493..1e44b8527e5f 100644
--- a/examples/asr/jasper_aishell_infer.py
+++ b/examples/asr/other/jasper_aishell_infer.py
@@ -9,6 +9,7 @@
 import nemo
 import nemo.collections.asr as nemo_asr
 from nemo.collections.asr.helpers import post_process_predictions, post_process_transcripts, word_error_rate
+from nemo.utils import logging
 
 
 def load_vocab(vocab_file):
@@ -47,7 +48,7 @@ def main():
     if args.local_rank is not None:
         if args.lm_path:
             raise NotImplementedError(
-                "Beam search decoder with LM does not currently support " "evaluation on multi-gpu."
+                "Beam search decoder with LM does not currently support evaluation on multi-gpu."
             )
         device = nemo.core.DeviceType.AllGpu
     else:
@@ -62,7 +63,7 @@ def main():
     )
 
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
@@ -88,7 +89,7 @@ def main():
     )
 
     n = len(data_layer)
-    nemo.logging.info('Evaluating {0} examples'.format(n))
+    logging.info('Evaluating {0} examples'.format(n))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"],
@@ -118,13 +119,11 @@ def main():
             num_cpus=max(os.cpu_count(), 1),
         )
 
-    nemo.logging.info('================================')
-    nemo.logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    nemo.logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    nemo.logging.info(
-        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
-    )
-    nemo.logging.info('================================')
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
+    logging.info('================================')
 
     (audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1,) = data_layer()
     processed_signal_e1, p_length_e1 = data_preprocessor(input_signal=audio_signal_e1, length=a_sig_length_e1)
@@ -149,7 +148,7 @@ def main():
     greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab)
     references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab)
     cer = word_error_rate(hypotheses=greedy_hypotheses, references=references, use_cer=True)
-    nemo.logging.info("Greedy CER {:.2f}%".format(cer * 100))
+    logging.info("Greedy CER {:.2f}%".format(cer * 100))
 
     if args.lm_path:
         beam_hypotheses = []
@@ -160,7 +159,7 @@ def main():
                 beam_hypotheses.append(j[0][1])
 
         cer = word_error_rate(hypotheses=beam_hypotheses, references=references, use_cer=True)
-        nemo.logging.info("Beam CER {:.2f}".format(cer * 100))
+        logging.info("Beam CER {:.2f}".format(cer * 100))
 
     if args.save_logprob:
         # Convert logits to list of numpy arrays
diff --git a/examples/asr/jasper_eval.py b/examples/asr/other/jasper_eval.py
similarity index 70%
rename from examples/asr/jasper_eval.py
rename to examples/asr/other/jasper_eval.py
index 9c5fac4eb36d..5ef4d4c51149 100644
--- a/examples/asr/jasper_eval.py
+++ b/examples/asr/other/jasper_eval.py
@@ -1,6 +1,17 @@
-# Copyright (c) 2019 NVIDIA Corporation
-# some of the code taken from:
-# https://github.com/NVIDIA/OpenSeq2Seq/blob/master/scripts/decode.py
+# Copyright (C) NVIDIA CORPORATION. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.**
+""" some of the code taken from: https://github.com/NVIDIA/OpenSeq2Seq/blob/master/scripts/decode.py"""
 import argparse
 import copy
 import os
@@ -12,6 +23,7 @@
 import nemo
 import nemo.collections.asr as nemo_asr
 from nemo.collections.asr.helpers import post_process_predictions, post_process_transcripts, word_error_rate
+from nemo.utils import logging
 
 
 def main():
@@ -63,7 +75,7 @@ def main():
     if args.local_rank is not None:
         if args.lm_path:
             raise NotImplementedError(
-                "Beam search decoder with LM does not currently support " "evaluation on multi-gpu."
+                "Beam search decoder with LM does not currently support evaluation on multi-gpu."
             )
         device = nemo.core.DeviceType.AllGpu
     else:
@@ -78,7 +90,7 @@ def main():
     )
 
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as f:
@@ -101,7 +113,7 @@ def main():
     )
 
     N = len(data_layer)
-    nemo.logging.info('Evaluating {0} examples'.format(N))
+    logging.info('Evaluating {0} examples'.format(N))
 
     data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
         sample_rate=sample_rate, **jasper_params["AudioToMelSpectrogramPreprocessor"]
@@ -114,13 +126,11 @@ def main():
     )
     greedy_decoder = nemo_asr.GreedyCTCDecoder()
 
-    nemo.logging.info('================================')
-    nemo.logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
-    nemo.logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
-    nemo.logging.info(
-        f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}"
-    )
-    nemo.logging.info('================================')
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
+    logging.info('================================')
 
     # Define inference DAG
     audio_signal_e1, a_sig_length_e1, transcript_e1, transcript_len_e1 = data_layer()
@@ -132,13 +142,22 @@ def main():
     eval_tensors = [log_probs_e1, predictions_e1, transcript_e1, transcript_len_e1, encoded_len_e1]
 
     # inference
-    evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir, cache=False)
+    evaluated_tensors = neural_factory.infer(tensors=eval_tensors, checkpoint_dir=load_dir)
 
     greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab)
     references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab)
 
     wer = word_error_rate(hypotheses=greedy_hypotheses, references=references)
-    nemo.logging.info("Greedy WER {:.2f}%".format(wer * 100))
+    logging.info("Greedy WER {:.2f}%".format(wer * 100))
+
+    # Convert logits to list of numpy arrays
+    logprob = []
+    for i, batch in enumerate(evaluated_tensors[0]):
+        for j in range(batch.shape[0]):
+            logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy())
+    if args.save_logprob:
+        with open(args.save_logprob, 'wb') as f:
+            pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL)
 
     # language model
     if args.lm_path:
@@ -154,10 +173,11 @@ def main():
 
         beam_wers = []
 
+        logprobexp = [np.exp(p) for p in logprob]
         for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
             for beta in np.arange(args.beta, args.beta_max, args.beta_step):
-                nemo.logging.info('================================')
-                nemo.logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
+                logging.info('================================')
+                logging.info(f'Infering with (alpha, beta): ({alpha}, {beta})')
                 beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM(
                     vocab=vocab,
                     beam_width=args.beam_width,
@@ -165,36 +185,22 @@ def main():
                     beta=beta,
                     lm_path=args.lm_path,
                     num_cpus=max(os.cpu_count(), 1),
+                    input_tensor=False,
                 )
-                beam_predictions_e1 = beam_search_with_lm(log_probs=log_probs_e1, log_probs_length=encoded_len_e1)
-
-                evaluated_tensors = neural_factory.infer(tensors=[beam_predictions_e1], use_cache=False, verbose=False)
-
-                beam_hypotheses = []
-                # Over mini-batch
-                for i in evaluated_tensors[-1]:
-                    # Over samples
-                    for j in i:
-                        beam_hypotheses.append(j[0][1])
-                lm_wer = word_error_rate(hypotheses=beam_hypotheses, references=references)
-                nemo.logging.info("Beam WER {:.2f}%".format(lm_wer * 100))
+
+                beam_predictions = beam_search_with_lm(log_probs=logprobexp, log_probs_length=None, force_pt=True)
+
+                beam_predictions = [b[0][1] for b in beam_predictions[0]]
+                lm_wer = word_error_rate(hypotheses=beam_predictions, references=references)
+                logging.info("Beam WER {:.2f}%".format(lm_wer * 100))
                 beam_wers.append(((alpha, beta), lm_wer * 100))
 
-        nemo.logging.info('Beam WER for (alpha, beta)')
-        nemo.logging.info('================================')
-        nemo.logging.info('\n' + '\n'.join([str(e) for e in beam_wers]))
-        nemo.logging.info('================================')
+        logging.info('Beam WER for (alpha, beta)')
+        logging.info('================================')
+        logging.info('\n' + '\n'.join([str(e) for e in beam_wers]))
+        logging.info('================================')
         best_beam_wer = min(beam_wers, key=lambda x: x[1])
-        nemo.logging.info('Best (alpha, beta): ' f'{best_beam_wer[0]}, ' f'WER: {best_beam_wer[1]:.2f}%')
-
-    if args.save_logprob:
-        # Convert logits to list of numpy arrays
-        logprob = []
-        for i, batch in enumerate(evaluated_tensors[0]):
-            for j in range(batch.shape[0]):
-                logprob.append(batch[j][: evaluated_tensors[4][i][j], :].cpu().numpy())
-        with open(args.save_logprob, 'wb') as f:
-            pickle.dump(logprob, f, protocol=pickle.HIGHEST_PROTOCOL)
+        logging.info('Best (alpha, beta): ' f'{best_beam_wer[0]}, ' f'WER: {best_beam_wer[1]:.2f}%')
 
 
 if __name__ == "__main__":
diff --git a/examples/asr/quartznet.py b/examples/asr/other/quartznet.py
similarity index 97%
rename from examples/asr/quartznet.py
rename to examples/asr/other/quartznet.py
index ad6f76c134ba..9dbea554c78d 100644
--- a/examples/asr/quartznet.py
+++ b/examples/asr/other/quartznet.py
@@ -10,6 +10,7 @@
 import nemo.collections.asr as nemo_asr
 import nemo.utils.argparse as nm_argparse
 from nemo.collections.asr.helpers import monitor_asr_train_progress, process_evaluation_batch, process_evaluation_epoch
+from nemo.utils import logging
 from nemo.utils.lr_policies import CosineAnnealing
 
 
@@ -34,7 +35,7 @@ def parse_args():
         type=int,
         default=None,
         required=True,
-        help="number of epochs to train. You should specify" "either num_epochs or max_steps",
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
     )
     parser.add_argument(
         "--model_config", type=str, required=True, help="model configuration file: model.yaml",
@@ -120,7 +121,7 @@ def create_all_dags(args, neural_factory):
 
             data_layers_eval.append(data_layer_eval)
     else:
-        nemo.logging.warning("There were no val datasets passed")
+        logging.warning("There were no val datasets passed")
 
     # create shared modules
 
@@ -242,7 +243,7 @@ def main():
     args.checkpoint_dir = neural_factory.checkpoint_dir
 
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(args, neural_factory)
diff --git a/examples/asr/quartznet_speech_commands.py b/examples/asr/quartznet_speech_commands.py
new file mode 100644
index 000000000000..7bcb9058974a
--- /dev/null
+++ b/examples/asr/quartznet_speech_commands.py
@@ -0,0 +1,340 @@
+# Copyright (c) 2019 NVIDIA Corporation
+import argparse
+import copy
+import glob
+import math
+import os
+from datetime import datetime
+from functools import partial
+
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.asr.helpers import (
+    monitor_classification_training_progress,
+    process_classification_evaluation_batch,
+    process_classification_evaluation_epoch,
+)
+from nemo.utils import logging
+from nemo.utils.lr_policies import CosineAnnealing, PolynomialDecayAnnealing, PolynomialHoldDecayAnnealing
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description='Jasper Speech Commands', conflict_handler='resolve',
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="sgd",
+        batch_size=128,
+        eval_batch_size=128,
+        lr=0.1,
+        amp_opt_level="O1",
+        create_tb_writer=True,
+    )
+
+    # Overwrite default args
+    parser.add_argument(
+        "--max_steps", type=int, default=None, required=False, help="max number of steps to train",
+    )
+    parser.add_argument(
+        "--num_epochs", type=int, default=None, required=False, help="number of epochs to train",
+    )
+    parser.add_argument(
+        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
+    )
+
+    # Create new args
+    parser.add_argument("--exp_name", default="Jasper_Speech_Commands", type=str)
+    parser.add_argument('--min_lr', default=1e-3, type=float)
+    parser.add_argument("--beta1", default=0.95, type=float)
+    parser.add_argument("--beta2", default=0.5, type=float)
+    parser.add_argument("--warmup_ratio", default=0.0, type=float)
+    parser.add_argument("--hold_ratio", default=0.0, type=float)
+    parser.add_argument(
+        "--load_dir", default=None, type=str, help="directory with pre-trained checkpoint",
+    )
+
+    args = parser.parse_args()
+
+    if args.max_steps is not None and args.num_epochs is not None:
+        raise ValueError("Either max_steps or num_epochs should be provided.")
+    return args
+
+
+def construct_name(name, lr, batch_size, max_steps, num_epochs, wd, optimizer, iter_per_step):
+    if max_steps is not None:
+        return "{0}-lr_{1}-bs_{2}-s_{3}-wd_{4}-opt_{5}-ips_{6}".format(
+            name, lr, batch_size, max_steps, wd, optimizer, iter_per_step
+        )
+    else:
+        return "{0}-lr_{1}-bs_{2}-e_{3}-wd_{4}-opt_{5}-ips_{6}".format(
+            name, lr, batch_size, num_epochs, wd, optimizer, iter_per_step
+        )
+
+
+def create_all_dags(args, neural_factory):
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as f:
+        jasper_params = yaml.load(f)
+
+    labels = jasper_params['labels']  # Vocab of tokens
+    sample_rate = jasper_params['sample_rate']
+
+    # Calculate num_workers for dataloader
+    total_cpus = os.cpu_count()
+    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)
+
+    # perturb_config = jasper_params.get('perturb', None)
+    train_dl_params = copy.deepcopy(jasper_params["AudioToSpeechLabelDataLayer"])
+    train_dl_params.update(jasper_params["AudioToSpeechLabelDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+    # del train_dl_params["normalize_transcripts"]
+
+    # Look for augmentations
+    audio_augmentor = jasper_params.get('AudioAugmentor', None)
+
+    data_layer = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=args.train_dataset,
+        labels=labels,
+        sample_rate=sample_rate,
+        batch_size=args.batch_size,
+        num_workers=cpu_per_traindl,
+        augmentor=audio_augmentor,
+        **train_dl_params,
+    )
+
+    crop_pad_augmentation = nemo_asr.CropOrPadSpectrogramAugmentation(audio_length=128)
+
+    N = len(data_layer)
+    steps_per_epoch = math.ceil(N / (args.batch_size * args.iter_per_step * args.num_gpus))
+    logging.info('Steps per epoch : {0}'.format(steps_per_epoch))
+    logging.info('Have {0} examples to train on.'.format(N))
+
+    data_preprocessor = nemo_asr.AudioToMFCCPreprocessor(
+        sample_rate=sample_rate, **jasper_params["AudioToMFCCPreprocessor"],
+    )
+
+    spectr_augment_config = jasper_params.get('SpectrogramAugmentation', None)
+    if spectr_augment_config:
+        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)
+
+    eval_dl_params = copy.deepcopy(jasper_params["AudioToSpeechLabelDataLayer"])
+    eval_dl_params.update(jasper_params["AudioToSpeechLabelDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+    data_layers_eval = []
+
+    if args.eval_datasets:
+        for eval_datasets in args.eval_datasets:
+            data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer(
+                manifest_filepath=eval_datasets,
+                sample_rate=sample_rate,
+                labels=labels,
+                batch_size=args.eval_batch_size,
+                num_workers=cpu_per_traindl,
+                **eval_dl_params,
+            )
+
+            data_layers_eval.append(data_layer_eval)
+    else:
+        logging.warning("There were no val datasets passed")
+
+    jasper_encoder = nemo_asr.JasperEncoder(**jasper_params["JasperEncoder"],)
+
+    jasper_decoder = nemo_asr.JasperDecoderForClassification(
+        feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"],
+        num_classes=len(labels),
+        **jasper_params['JasperDecoderForClassification'],
+    )
+
+    ce_loss = nemo_asr.CrossEntropyLossNM()
+
+    logging.info('================================')
+    logging.info(f"Number of parameters in encoder: {jasper_encoder.num_weights}")
+    logging.info(f"Number of parameters in decoder: {jasper_decoder.num_weights}")
+    logging.info(f"Total number of parameters in model: " f"{jasper_decoder.num_weights + jasper_encoder.num_weights}")
+    logging.info('================================')
+
+    # Train DAG
+    # --- Assemble Training DAG --- #
+    audio_signal, audio_signal_len, commands, command_len = data_layer()
+
+    processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
+
+    processed_signal, processed_signal_len = crop_pad_augmentation(
+        input_signal=processed_signal, length=audio_signal_len
+    )
+
+    if spectr_augment_config:
+        processed_signal = data_spectr_augmentation(input_spec=processed_signal)
+
+    encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=processed_signal_len)
+
+    decoded = jasper_decoder(encoder_output=encoded)
+
+    loss = ce_loss(logits=decoded, labels=commands)
+
+    # Callbacks needed to print info to console and Tensorboard
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        # Notice that we pass in loss, predictions, and the labels (commands).
+        # Of course we would like to see our training loss, but we need the
+        # other arguments to calculate the accuracy.
+        tensors=[loss, decoded, commands],
+        # The print_func defines what gets printed.
+        print_func=partial(monitor_classification_training_progress, eval_metric=None),
+        get_tb_values=lambda x: [("loss", x[0])],
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    chpt_callback = nemo.core.CheckpointCallback(
+        folder=neural_factory.checkpoint_dir, load_from_folder=args.load_dir, step_freq=args.checkpoint_save_freq,
+    )
+
+    callbacks = [train_callback, chpt_callback]
+
+    # assemble eval DAGs
+    for i, eval_dl in enumerate(data_layers_eval):
+        # --- Assemble Training DAG --- #
+        test_audio_signal, test_audio_signal_len, test_commands, test_command_len = eval_dl()
+
+        test_processed_signal, test_processed_signal_len = data_preprocessor(
+            input_signal=test_audio_signal, length=test_audio_signal_len
+        )
+
+        test_processed_signal, test_processed_signal_len = crop_pad_augmentation(
+            input_signal=test_processed_signal, length=test_processed_signal_len
+        )
+
+        test_encoded, test_encoded_len = jasper_encoder(
+            audio_signal=test_processed_signal, length=test_processed_signal_len
+        )
+
+        test_decoded = jasper_decoder(encoder_output=test_encoded)
+
+        test_loss = ce_loss(logits=test_decoded, labels=test_commands)
+
+        # create corresponding eval callback
+        tagname = os.path.basename(args.eval_datasets[i]).split(".")[0]
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=[test_loss, test_decoded, test_commands],
+            user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),
+            user_epochs_done_callback=partial(process_classification_evaluation_epoch, eval_metric=1, tag=tagname),
+            eval_step=args.eval_freq,  # How often we evaluate the model on the test set
+            tb_writer=neural_factory.tb_writer,
+        )
+
+        callbacks.append(eval_callback)
+    return loss, callbacks, steps_per_epoch
+
+
+def main():
+    args = parse_args()
+    name = construct_name(
+        args.exp_name,
+        args.lr,
+        args.batch_size,
+        args.max_steps,
+        args.num_epochs,
+        args.weight_decay,
+        args.optimizer,
+        args.iter_per_step,
+    )
+
+    # time stamp
+    date_time = datetime.now().strftime("%m-%d-%Y -- %H-%M-%S")
+
+    log_dir = name
+    if args.work_dir:
+        log_dir = os.path.join(args.work_dir, name)
+
+    if args.tensorboard_dir is None:
+        tensorboard_dir = os.path.join(name, 'tensorboard', date_time)
+    else:
+        tensorboard_dir = args.tensorboard_dir
+
+    if args.checkpoint_dir is None:
+        checkpoint_dir = os.path.join(name, date_time)
+    else:
+        base_checkpoint_dir = args.checkpoint_dir
+        if len(glob.glob(os.path.join(base_checkpoint_dir, '*.pt'))) > 0:
+            checkpoint_dir = base_checkpoint_dir
+        else:
+            checkpoint_dir = os.path.join(args.checkpoint_dir, date_time)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=log_dir,
+        checkpoint_dir=checkpoint_dir,
+        create_tb_writer=args.create_tb_writer,
+        files_to_copy=[args.model_config, __file__],
+        cudnn_benchmark=args.cudnn_benchmark,
+        tensorboard_dir=tensorboard_dir,
+    )
+    args.num_gpus = neural_factory.world_size
+
+    if args.local_rank is not None:
+        logging.info('Doing ALL GPU')
+
+    # build dags
+    train_loss, callbacks, steps_per_epoch = create_all_dags(args, neural_factory)
+
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as f:
+        jasper_params = yaml.load(f)
+
+    lr_schedule = jasper_params.get('lr_schedule', 'CosineAnnealing')
+
+    if lr_schedule == 'CosineAnnealing':
+        lr_policy = CosineAnnealing(
+            total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch,
+            warmup_ratio=args.warmup_ratio,
+            min_lr=args.min_lr,
+        )
+    elif lr_schedule == 'PolynomialDecayAnnealing':
+        lr_policy = PolynomialDecayAnnealing(
+            total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch,
+            warmup_ratio=args.warmup_ratio,
+            min_lr=args.min_lr,
+            power=2.0,
+        )
+    elif lr_schedule == 'PolynomialHoldDecayAnnealing':
+        lr_policy = PolynomialHoldDecayAnnealing(
+            total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch,
+            warmup_ratio=args.warmup_ratio,
+            hold_ratio=args.hold_ratio,
+            min_lr=args.min_lr,
+            power=2.0,
+        )
+    else:
+        raise ValueError("LR schedule is invalid !")
+
+    logging.info(f"Using `{lr_policy}` Learning Rate Scheduler")
+
+    # train model
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=lr_policy,
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "max_steps": args.max_steps,
+            "lr": args.lr,
+            "momentum": 0.95,
+            "betas": (args.beta1, args.beta2),
+            "weight_decay": args.weight_decay,
+            "grad_norm_clip": None,
+        },
+        batches_per_step=args.iter_per_step,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/asr/speech2text.py b/examples/asr/speech2text.py
new file mode 100644
index 000000000000..f1c07c17d8bf
--- /dev/null
+++ b/examples/asr/speech2text.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from argparse import ArgumentParser
+from functools import partial
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.helpers import monitor_asr_train_progress, process_evaluation_batch, process_evaluation_epoch
+from nemo.utils import logging
+from nemo.utils.lr_policies import CosineAnnealing
+
+
+def main():
+    # Usage and Command line arguments
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--asr_model",
+        type=str,
+        default="QuartzNet15x5-En",
+        required=True,
+        help="Pass: 'QuartzNet15x5', 'QuartzNet15x5-Zh', or 'JasperNet10x5-En' to train from pre-trained models. To train from scratch pass path to modelfile ending with .yaml.",
+    )
+    parser.add_argument(
+        "--amp_opt_level",
+        default="O0",
+        type=str,
+        choices=["O0", "O1", "O2", "O3"],
+        help="See: https://nvidia.github.io/apex/amp.html",
+    )
+    parser.add_argument("--train_dataset", type=str, required=True, default=None, help="training dataset path")
+    parser.add_argument("--eval_datasets", type=str, nargs="*", help="evaluation datasets paths")
+    parser.add_argument("--eval_freq", default=1000, type=int, help="Evaluation frequency")
+    parser.add_argument("--eval_batch_size", type=int, default=8, help="batch size to use for evaluation")
+    parser.add_argument("--local_rank", default=None, type=int, help="node rank for distributed training")
+    parser.add_argument("--stats_freq", default=25, type=int, help="frequency with which to update train stats")
+    parser.add_argument("--checkpoint_dir", default=None, type=str, help="Folder where to save checkpoints")
+    parser.add_argument("--checkpoint_save_freq", required=False, type=int, help="how often to checkpoint")
+    parser.add_argument("--optimizer", default="novograd", type=str)
+    parser.add_argument("--warmup_ratio", default=0.02, type=float, help="learning rate warmup ratio")
+    parser.add_argument("--batch_size", required=True, type=int, help="train batch size per GPU")
+    parser.add_argument("--num_epochs", default=5, type=int, help="number of epochs to train")
+    parser.add_argument("--lr", default=0.01, type=float)
+    parser.add_argument("--beta1", default=0.95, type=float)
+    parser.add_argument("--beta2", default=0.5, type=float)
+    parser.add_argument("--weight_decay", default=0.001, type=float)
+    parser.add_argument("--iter_per_step", default=1, type=int, help="number of grad accumulations per batch")
+    parser.add_argument("--wandb_exp_name", default=None, type=str)
+    parser.add_argument("--wandb_project", default=None, type=str)
+    parser.add_argument("--max_train_audio_len", default=16.7, type=float, help="max audio length")
+    parser.add_argument("--trim_silence", default=True, type=bool, help="trim audio from silence or not")
+    args = parser.parse_args()
+
+    # Setup NeuralModuleFactory to control training
+    # instantiate Neural Factory with supported backend
+    nf = nemo.core.NeuralModuleFactory(
+        local_rank=args.local_rank,  # This is necessary for distributed training
+        optimization_level=args.amp_opt_level,  # This is necessary for mixed precision optimization
+        cudnn_benchmark=True,
+    )
+
+    # Instantiate the model which we'll train
+    if args.asr_model.endswith('.yaml'):
+        logging.info(f"Speech2Text: Will train from scratch using config from {args.asr_model}")
+        asr_model = nemo_asr.models.ASRConvCTCModel.import_from_config(args.asr_model)
+    else:
+        logging.info(f"Speech2Text: Will fine-tune from {args.asr_model}")
+        asr_model = nemo_asr.models.ASRConvCTCModel.from_pretrained(
+            model_info=args.asr_model, local_rank=args.local_rank
+        )
+    logging.info("\n\n")
+    logging.info(f"Speech2Text: Training on {nf.world_size} GPUs.")
+    logging.info(f"Training {type(asr_model)} model.")
+    logging.info(f"Training CTC model with alphabet {asr_model.vocabulary}.")
+    logging.info(f"Training CTC model with {asr_model.num_weights} weights.\n\n")
+
+    train_data_layer = nemo_asr.AudioToTextDataLayer(
+        manifest_filepath=args.train_dataset,
+        labels=asr_model.vocabulary,
+        batch_size=args.batch_size,
+        trim_silence=args.trim_silence,
+        max_duration=args.max_train_audio_len,
+        shuffle=True,
+    )
+    ctc_loss = nemo_asr.CTCLossNM(num_classes=len(asr_model.vocabulary))
+    greedy_decoder = nemo_asr.GreedyCTCDecoder()
+
+    audio_signal, audio_signal_len, transcript, transcript_len = train_data_layer()
+    log_probs, encoded_len = asr_model(input_signal=audio_signal, length=audio_signal_len)
+    predictions = greedy_decoder(log_probs=log_probs)
+    loss = ctc_loss(log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len)
+
+    # Callbacks which we'll be using:
+    callbacks = []
+    # SimpleLossLogger prints basic training stats (e.g. loss) to console
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        tensors=[loss, predictions, transcript, transcript_len],
+        step_freq=args.stats_freq,
+        print_func=partial(monitor_asr_train_progress, labels=asr_model.vocabulary),
+    )
+    callbacks.append(train_callback)
+    if args.checkpoint_dir is not None and args.checkpoint_save_freq is not None:
+        # Checkpoint callback saves checkpoints periodically
+        checkpointer_callback = nemo.core.CheckpointCallback(
+            folder=args.checkpoint_dir, step_freq=args.checkpoint_save_freq
+        )
+        callbacks.append(checkpointer_callback)
+
+    if args.wandb_exp_name is not None and args.wandb_project is not None:
+        # WandbCallback saves stats to Weights&Biases
+        wandb_callback = nemo.core.WandBLogger(
+            step_freq=args.stats_freq, wandb_name=args.wandb_exp_name, wandb_project=args.wandb_project, args=args
+        )
+        callbacks.append(wandb_callback)
+
+    # Evaluation
+    if args.eval_datasets is not None and args.eval_freq is not None:
+        asr_model.eval()  # switch model to evaluation mode
+        logging.info(f"Will perform evaluation every {args.eval_freq} steps.")
+        for ind, eval_dataset in enumerate(args.eval_datasets):
+            eval_data_layer = nemo_asr.AudioToTextDataLayer(
+                manifest_filepath=eval_dataset, labels=asr_model.vocabulary, batch_size=args.eval_batch_size
+            )
+            audio_signal, audio_signal_len, transcript, transcript_len = eval_data_layer()
+            log_probs, encoded_len = asr_model(input_signal=audio_signal, length=audio_signal_len)
+            eval_predictions = greedy_decoder(log_probs=log_probs)
+            eval_loss = ctc_loss(
+                log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len
+            )
+            tag_name = os.path.basename(eval_dataset).split(".")[0]
+            eval_callback = nemo.core.EvaluatorCallback(
+                eval_tensors=[eval_loss, eval_predictions, transcript, transcript_len],
+                user_iter_callback=partial(process_evaluation_batch, labels=asr_model.vocabulary),
+                user_epochs_done_callback=partial(process_evaluation_epoch, tag=tag_name),
+                eval_step=args.eval_freq,
+                wandb_name=args.wandb_exp_name,
+                wandb_project=args.wandb_project,
+            )
+            callbacks.append(eval_callback)
+
+    steps_in_epoch = len(train_data_layer) / (args.batch_size * args.iter_per_step * nf.world_size)
+    lr_policy = CosineAnnealing(total_steps=args.num_epochs * steps_in_epoch, warmup_ratio=args.warmup_ratio)
+
+    nf.train(
+        tensors_to_optimize=[loss],
+        callbacks=callbacks,
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "lr": args.lr,
+            "betas": (args.beta1, args.beta2),
+            "weight_decay": args.weight_decay,
+        },
+        batches_per_step=args.iter_per_step,
+        lr_policy=lr_policy,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/asr/speech2text_infer.py b/examples/asr/speech2text_infer.py
new file mode 100644
index 000000000000..e041ef0f4cb9
--- /dev/null
+++ b/examples/asr/speech2text_infer.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from argparse import ArgumentParser
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.helpers import post_process_predictions, post_process_transcripts, word_error_rate
+from nemo.utils import logging
+
+
+def main():
+    # Usage and Command line arguments
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--asr_model",
+        type=str,
+        default="QuartzNet15x5-En",
+        required=True,
+        help="Pass: 'QuartzNet15x5-En', 'QuartzNet15x5-Zh', or 'JasperNet10x5-En'",
+    )
+    parser.add_argument("--dataset", type=str, required=True, help="path to evaluation data")
+    parser.add_argument("--eval_batch_size", type=int, default=1, help="batch size to use for evaluation")
+    parser.add_argument("--wer_target", type=float, default=None, help="used by test")
+    parser.add_argument("--trim_silence", default=True, type=bool, help="trim audio from silence or not")
+    args = parser.parse_args()
+
+    # Setup NeuralModuleFactory to control training
+    # instantiate Neural Factory with supported backend
+    nf = nemo.core.NeuralModuleFactory()
+
+    # Instantiate the model which we'll train
+    logging.info(f"Speech2Text: Will fine-tune from {args.asr_model}")
+    asr_model = nemo_asr.models.ASRConvCTCModel.from_pretrained(model_info=args.asr_model)
+    asr_model.eval()
+
+    logging.info("\n\n")
+    logging.info(f"Evaluation using {type(asr_model)} model.")
+    logging.info(f"Evaluation using alphabet {asr_model.vocabulary}.")
+    logging.info(f"The model has {asr_model.num_weights} weights.\n\n")
+
+    eval_data_layer = nemo_asr.AudioToTextDataLayer(
+        manifest_filepath=args.dataset,
+        labels=asr_model.vocabulary,
+        batch_size=args.eval_batch_size,
+        trim_silence=args.trim_silence,
+        shuffle=False,
+    )
+    greedy_decoder = nemo_asr.GreedyCTCDecoder()
+
+    audio_signal, audio_signal_len, transcript, transcript_len = eval_data_layer()
+    log_probs, encoded_len = asr_model(input_signal=audio_signal, length=audio_signal_len)
+    predictions = greedy_decoder(log_probs=log_probs)
+
+    # inference
+    eval_tensors = [log_probs, predictions, transcript, transcript_len, encoded_len]
+    evaluated_tensors = nf.infer(tensors=eval_tensors)
+
+    greedy_hypotheses = post_process_predictions(evaluated_tensors[1], asr_model.vocabulary)
+    references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], asr_model.vocabulary)
+
+    wer = word_error_rate(hypotheses=greedy_hypotheses, references=references)
+    logging.info("Greedy WER {:.2f}%".format(wer * 100))
+    if args.wer_target is not None:
+        if args.wer_target < wer:
+            raise ValueError(f"Resulting WER {wer} is higher than the target {args.wer_target}")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/image/gan.py b/examples/image/gan.py
index 08c43899ef21..42ed3cb0ac90 100644
--- a/examples/image/gan.py
+++ b/examples/image/gan.py
@@ -9,6 +9,7 @@
 import nemo
 import nemo.collections.simple_gan as nemo_simple_gan
 from nemo.backends.pytorch.torchvision.helpers import compute_accuracy, eval_epochs_done_callback, eval_iter_callback
+from nemo.utils import logging
 
 parser = argparse.ArgumentParser(description='MNIST')
 parser.add_argument("--local_rank", default=None, type=int)
@@ -106,10 +107,10 @@ def put_tensor_in_dict(tensors, global_vars):
 
 def print_losses(tensors):
     g_loss, i_loss, r_loss, grad_p = tensors
-    nemo.logging.info(f"Generator Loss: {g_loss}")
-    nemo.logging.info(f"Interpolated Loss: {i_loss}")
-    nemo.logging.info(f"Real Loss: {r_loss}")
-    nemo.logging.info(f"Grad Penalty: {grad_p}")
+    logging.info(f"Generator Loss: {g_loss}")
+    logging.info(f"Interpolated Loss: {i_loss}")
+    logging.info(f"Real Loss: {r_loss}")
+    logging.info(f"Grad Penalty: {grad_p}")
 
 
 def get_tb_name_value(tensors):
diff --git a/examples/image/transfer_learning.py b/examples/image/transfer_learning.py
index bb3d54fe837c..206104d2404f 100644
--- a/examples/image/transfer_learning.py
+++ b/examples/image/transfer_learning.py
@@ -8,8 +8,7 @@
 
 import nemo
 from nemo.backends.pytorch.torchvision.helpers import compute_accuracy, eval_epochs_done_callback, eval_iter_callback
-
-logging = nemo.logging
+from nemo.utils import logging
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
 
diff --git a/examples/neural_graphs/img/neural_graphs_general.png b/examples/neural_graphs/img/neural_graphs_general.png
new file mode 100644
index 000000000000..996e3db26e3d
Binary files /dev/null and b/examples/neural_graphs/img/neural_graphs_general.png differ
diff --git a/examples/neural_graphs/img/neural_graphs_nesting.png b/examples/neural_graphs/img/neural_graphs_nesting.png
new file mode 100644
index 000000000000..c411587714b8
Binary files /dev/null and b/examples/neural_graphs/img/neural_graphs_nesting.png differ
diff --git a/examples/neural_graphs/neural_graph_advanced.ipynb b/examples/neural_graphs/neural_graph_advanced.ipynb
new file mode 100644
index 000000000000..fd8a0b955dc9
--- /dev/null
+++ b/examples/neural_graphs/neural_graph_advanced.ipynb
@@ -0,0 +1,379 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# Copyright (c) 2020 NVIDIA. All Rights Reserved.\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License.\n",
+    "# =============================================================================\n",
+    "\n",
+    "from functools import partial\n",
+    "from os.path import expanduser, join, abspath, dirname, exists\n",
+    "import tarfile\n",
+    "\n",
+    "from ruamel.yaml import YAML\n",
+    "\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "from nemo.collections.asr.helpers import monitor_asr_train_progress\n",
+    "from nemo.core import NeuralGraph, OperationMode, DeviceType, SimpleLossLoggerCallback\n",
+    "from nemo.utils import logging\n",
+    "from nemo.utils.app_state import AppState\n",
+    "\n",
+    "# Create Neural(Module)Factory, use CPU.\n",
+    "nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Tutorial II: The advanced functionality\n",
+    "\n",
+    "In this first part of the Neural Graphs (NGs) tutorial we will focus on a more complex example: training of an End-to-End Convolutional Neural Acoustic Model called JASPER. We will build a \"model graph\" and show how we can nest it into another graphs, how we can freeze/unfreeze modules, use graph configuration and save/load graph checkpoints.\n",
+    "\n",
+    "#### This part covers the following:\n",
+    " * how to nest one graph into another\n",
+    " * how to serialize and deserialize a graph\n",
+    " * how to export and import serialized graph configuration to/from YAML files\n",
+    " * how to save and load graph checkpoints (containing weights of the Trainable NMs)\n",
+    " * how to freeze/unfreeze modules in a graph\n",
+    " \n",
+    "Additionally, we will show how use `AppState` to list all the modules and graphs we have created in the scope of our application.\n",
+    "In order to learn more about graph nesting and input/output binding please refer to the first part of the tutorial.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare the samples for training JASPER - we will use the data available in NeMo tests.\n",
+    "data_folder = abspath(\"../../tests/data/\")\n",
+    "logging.info(\"Looking up for test ASR data\")\n",
+    "if not exists(join(data_folder, \"asr\")):\n",
+    "    logging.info(\"Extracting ASR data to: {0}\".format(join(data_folder, \"asr\")))\n",
+    "    tar = tarfile.open(join(data_folder, \"asr.tar.gz\"), \"r:gz\")\n",
+    "    tar.extractall(path=data_folder)\n",
+    "    tar.close()\n",
+    "else:\n",
+    "    logging.info(\"ASR data found in: {0}\".format(join(data_folder, \"asr\")))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set paths to model configuration, manifest and sample files.\n",
+    "model_config_file = abspath(\"../asr/configs/jasper_an4.yaml\")\n",
+    "manifest_path = join(data_folder, 'asr/tarred_an4/tarred_audio_manifest.json')\n",
+    "tarpath = join(data_folder, 'asr/tarred_an4/audio_1.tar')\n",
+    "\n",
+    "# Open the model config file and get vocabulary.\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(expanduser(model_config_file)) as f:\n",
+    "    config = yaml.load(f)\n",
+    "    \n",
+    "# Get labels (vocabulary).\n",
+    "vocab = config['labels']\n",
+    "vocab_len = len(vocab)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate DataLayer that can load the tarred samples.\n",
+    "data_layer = nemo_asr.TarredAudioToTextDataLayer(\n",
+    "    audio_tar_filepaths=tarpath, manifest_filepath=manifest_path, labels=vocab, batch_size=16)\n",
+    "logging.info(\"Loaded {} samples that we will use for training\".format(len(data_layer)))\n",
+    "\n",
+    "# Create rest of the modules using the Neural Module deserialization feature.\n",
+    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.deserialize(config[\"AudioToMelSpectrogramPreprocessor\"])\n",
+    "\n",
+    "jasper_encoder = nemo_asr.JasperEncoder.deserialize(config[\"JasperEncoder\"])\n",
+    "jasper_decoder = nemo_asr.JasperDecoderForCTC.deserialize(\n",
+    "    config[\"JasperDecoderForCTC\"], overwrite_params={\"num_classes\": vocab_len}\n",
+    ")\n",
+    "ctc_loss = nemo_asr.CTCLossNM(num_classes=vocab_len)\n",
+    "greedy_decoder = nemo_asr.GreedyCTCDecoder()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the Jasper \"model\" graph.\n",
+    "with NeuralGraph(operation_mode=OperationMode.both, name=\"jasper_model\") as jasper_model:\n",
+    "    # Copy one input port definitions - using \"user\" port names.\n",
+    "    jasper_model.inputs[\"input\"] = data_preprocessor.input_ports[\"input_signal\"]\n",
+    "    # Bind selected inputs - bind other using the default port name.\n",
+    "    i_processed_signal, i_processed_signal_len = data_preprocessor(input_signal=jasper_model.inputs[\"input\"], length=jasper_model)\n",
+    "    i_encoded, i_encoded_len = jasper_encoder(audio_signal=i_processed_signal, length=i_processed_signal_len)\n",
+    "    i_log_probs = jasper_decoder(encoder_output=i_encoded)\n",
+    "    # Bind selected outputs - using \"user\" port names.\n",
+    "    jasper_model.outputs[\"log_probs\"] = i_log_probs\n",
+    "    jasper_model.outputs[\"encoded_len\"] = i_encoded_len\n",
+    "\n",
+    "# Print the summary.\n",
+    "logging.info(jasper_model.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Serialize the whole graph.\n",
+    "serialized_jasper = jasper_model.serialize()\n",
+    "logging.info(\"Serialized JASPER model:\\n {}\".format(serialized_jasper))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can also serialize/deserialize a single NeuralModule, e.g. a decoder.\n",
+    "logging.info(\"Serialized JASPER Decoder:\\n {}\".format(jasper_decoder.serialize()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can also export the serialized configuration to a file.\n",
+    "jasper_model.export_to_config(\"my_jasper.yml\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display the lists of graph and modules.\n",
+    "logging.info(AppState().graphs.summary())\n",
+    "logging.info(AppState().modules.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Deserialize graph - create a copy of the JASPER \"model\".\n",
+    "# Please note that the modules exist, so we must enable the graph to \"reuse\" them.\n",
+    "# (Commenting out reuse_existing_modules will raise a KeyError.)\n",
+    "jasper_copy = NeuralGraph.deserialize(serialized_jasper, reuse_existing_modules=True)\n",
+    "serialized_jasper_copy = jasper_copy.serialize()\n",
+    "assert serialized_jasper == serialized_jasper_copy # THE SAME! Please note name of the graph is not exported."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Alternativelly, import a copy of the JASPER \"model\" from config.\n",
+    "jasper_copy = NeuralGraph.import_from_config(\"my_jasper.yml\", reuse_existing_modules=True, name=\"jasper_copy\")\n",
+    "\n",
+    "# Print the summary.\n",
+    "logging.info(jasper_copy.summary())\n",
+    "\n",
+    "# Display list of graph and modules\n",
+    "logging.info(AppState().graphs.summary())\n",
+    "logging.info(AppState().modules.summary())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that there are two graphs in the \"Graph Registry\", yet the list of modules haven't changed. This means that both graphs are spanned on the same list of modules."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create the \"training\" graph.\n",
+    "with NeuralGraph(operation_mode=OperationMode.training) as training_graph:\n",
+    "    # Create the \"implicit\" training graph.\n",
+    "    o_audio_signal, o_audio_signal_len, o_transcript, o_transcript_len = data_layer()\n",
+    "    # Use Jasper module as any other neural module.\n",
+    "    o_log_probs, o_encoded_len = jasper_copy(input=o_audio_signal, length=o_audio_signal_len)\n",
+    "    o_predictions = greedy_decoder(log_probs=o_log_probs)\n",
+    "    o_loss = ctc_loss(\n",
+    "        log_probs=o_log_probs, targets=o_transcript, input_length=o_encoded_len, target_length=o_transcript_len\n",
+    "    )\n",
+    "    # Set the graph output.\n",
+    "    training_graph.outputs[\"o_loss\"] = o_loss\n",
+    "\n",
+    "# Print the summary.\n",
+    "logging.info(training_graph.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a simple loss callback.\n",
+    "loss_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[training_graph.output_tensors[\"o_loss\"]],\n",
+    "    print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), step_freq=1\n",
+    ")\n",
+    "# Train the graph.\n",
+    "nf.train(\n",
+    "    training_graph=training_graph,\n",
+    "    optimizer=\"novograd\",\n",
+    "    callbacks=[loss_callback],\n",
+    "    optimization_params={\"max_steps\": 5, \"lr\": 0.01},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Please note that the loss is going down. Still, we use only 65 samples, so we cannot really expect the model to be useful;)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally, I can save the graph checkpoint!\n",
+    "# Note that optionally you can indicate the names of the modules to be saved.\n",
+    "jasper_copy.save_to(\"my_jasper.chkpt\")#, module_names=[\"jasperencoder0\"])\n",
+    "# Please note only \"trainable\" modules will be saved."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We can also save the whole training graph - which in this case will result in the same checkpoint...\n",
+    "training_graph.export_to_config(\"my_whole_graph.yml\")\n",
+    "training_graph.save_to(\"my_whole_graph.chkpt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Finally, I can load everything and continue training.\n",
+    "new_training_graph = NeuralGraph.import_from_config(\"my_whole_graph.yml\", reuse_existing_modules=True)\n",
+    "\n",
+    "# Let's restore only the encoder\n",
+    "new_training_graph.restore_from(\"my_whole_graph.chkpt\", module_names=[\"jasperencoder0\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# So let us freeze the whole graph...\n",
+    "training_graph.freeze() #we can also freeze a subset, using \"module_names=[]\"\"\n",
+    "# ... and finetune only the decoder.\n",
+    "training_graph.unfreeze(module_names=[\"jasperdecoderforctc0\"])\n",
+    "\n",
+    "# Ok, let us see what the graph looks like now.\n",
+    "logging.info(training_graph.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Create a new simple callback using graph outputs \"o_loss\".\n",
+    "loss_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[new_training_graph.output_tensors[\"o_loss\"]],\n",
+    "    print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), step_freq=1\n",
+    ")\n",
+    "\n",
+    "# And continue training...\n",
+    "nf.reset_trainer()\n",
+    "nf.train(\n",
+    "    training_graph=new_training_graph,\n",
+    "    optimizer=\"novograd\",\n",
+    "    callbacks=[loss_callback],\n",
+    "    optimization_params={\"max_steps\": 5, \"lr\": 0.01},\n",
+    ")\n",
+    "# Please note that this will throw an error if you will freeze all the trainable modules!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nemo-env",
+   "language": "python",
+   "name": "nemo-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/neural_graphs/neural_graph_basic.ipynb b/examples/neural_graphs/neural_graph_basic.ipynb
new file mode 100644
index 000000000000..d69ce6861a5d
--- /dev/null
+++ b/examples/neural_graphs/neural_graph_basic.ipynb
@@ -0,0 +1,296 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# =============================================================================\n",
+    "# Copyright (c) 2020 NVIDIA. All Rights Reserved.\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License.\n",
+    "# =============================================================================\n",
+    "\n",
+    "import torch\n",
+    "\n",
+    "from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet\n",
+    "from nemo.core import (\n",
+    "    DeviceType,\n",
+    "    EvaluatorCallback,\n",
+    "    NeuralGraph,\n",
+    "    NeuralModuleFactory,\n",
+    "    OperationMode,\n",
+    "    SimpleLossLoggerCallback,\n",
+    ")\n",
+    "from nemo.utils import logging\n",
+    "from nemo.utils.app_state import AppState\n",
+    "\n",
+    "# Create Neural(Module)Factory, use CPU.\n",
+    "nf = NeuralModuleFactory(placement=DeviceType.CPU)"
+   ]
+  },
+  {
+   "attachments": {
+    "neural_graphs_general.png": {
+     "image/png": "iVBORw0KGgoAAAANSUhEUgAAA5wAAAHPCAYAAAA/N9WhAAAKx2lDQ1BJQ0MgUHJvZmlsZQAASImVlwdYU1kWgO97L73QAhGQEnpHepUSeiiCdLARkkBCiTEhqNgQEUdwLKiIoCLgoICCYwFkLIgF2yCoiH2CDCrqOFjAgso+YAkzu9/ufnved3L/d3LuOefe7958JwBQCWyRKANWAiBTmCWODPRhxCckMvC/AwSQgAogADybIxExIyJCASpT499l5C6AxsfbVuOx/v37/yrKXJ6EAwAUgXIyV8LJRPkEqm84InEWAMgB1G6wNEs0zldQVhWjBaL8aJxTJ3lonJMnGIOZ8ImO9EVZHQAChc0WpwJAMUTtjGxOKhqH4oeyjZArEKKMvgNPDp/NRRnNCywzMxePswxl0+S/xEn9W8xkeUw2O1XOk2uZEIKfQCLKYC//P7fjf0tmhnQqhzGqFL44KBId6eie3UtfHCJnYfKc8CkWcCf8J5gvDYqZYo7EN3GKuWy/EPncjDmhU5wiCGDJ42SxoqeYJ/GPmmLx4kh5rhSxL3OK2eLpvNL0GLmdz2PJ4+fwo+OmOFsQO2eKJelRIdM+vnK7WBopr58nDPSZzhsgX3um5C/rFbDkc7P40UHytbOn6+cJmdMxJfHy2rg8P/9pnxi5vyjLR55LlBEh9+dlBMrtkuwo+dws9EBOz42Q72EaOzhiioEf8Aeh6MMAMcAOOABb9DMMgCzesvEzCnwXi5aLBan8LAYTvWU8BkvIsbZk2NnY2QAwfmcnj8T7exN3EaITpm0iNL4reuaR6mlbsiYAzeg50iBO2wwPAqAYD0BTLkcqzp60jV8ngEV/CxSBKtAAOsAAmAIrtDIn4A680YqDQTiIBglgIeAAPsgEYrAUrARrQQEoAlvBTlAGKkA1OASOgGOgGZwG58FlcB10gR7wEMjAAHgFhsAIGIUgCA9RIRqkAelCRpAFZAe5QJ6QPxQKRUIJUBKUCgkhKbQSWgcVQcVQGVQJ1UI/Q6eg89BVqBu6D/VBg9A76AuMwBRYFdaGjeFZsAvMhEPgaHgBnAovgXPgfHgzXApXwYfhJvg8fB3ugWXwK3gYAQgZoSN6iBXigvgi4UgikoKIkdVIIVKCVCENSCvSgdxGZMhr5DMGh6FhGBgrjDsmCBOD4WCWYFZjNmHKMIcwTZiLmNuYPswQ5juWitXCWmDdsCxsPDYVuxRbgC3B1mBPYi9he7AD2BEcDkfHmeCccUG4BFwabgVuE24vrhHXhuvG9eOG8Xi8Bt4C74EPx7PxWfgC/G78Yfw5/C38AP4TgUzQJdgRAgiJBCEhj1BCqCOcJdwiPCeMEpWIRkQ3YjiRS1xO3EI8QGwl3iQOEEdJyiQTkgcpmpRGWksqJTWQLpEekd6TyWR9sit5LllAziWXko+Sr5D7yJ8pKhRzii9lPkVK2Uw5SGmj3Ke8p1KpxlRvaiI1i7qZWku9QH1C/aRAU7BWYClwFdYolCs0KdxSeKNIVDRSZCouVMxRLFE8rnhT8bUSUclYyVeJrbRaqVzplFKv0rAyTdlWOVw5U3mTcp3yVeUXKngVYxV/Fa5Kvkq1ygWVfhpCM6D50ji0dbQDtEu0AVWcqokqSzVNtUj1iGqn6pCaipqDWqzaMrVytTNqMjpCN6az6Bn0LfRj9Lv0LzO0ZzBn8GZsnNEw49aMj+oz1b3VeeqF6o3qPepfNBga/hrpGts0mjUea2I0zTXnai7V3Kd5SfP1TNWZ7jM5MwtnHpv5QAvWMteK1FqhVa11Q2tYW0c7UFukvVv7gvZrHbqOt06azg6dszqDujRdT12B7g7dc7ovGWoMJiODUcq4yBjS09IL0pPqVep16o3qm+jH6OfpN+o/NiAZuBikGOwwaDcYMtQ1DDNcaVhv+MCIaORixDfaZdRh9NHYxDjOeINxs/ELE3UTlkmOSb3JI1OqqZfpEtMq0ztmODMXs3SzvWZd5rC5oznfvNz8pgVs4WQhsNhr0W2JtXS1FFpWWfZaUayYVtlW9VZ91nTrUOs862brN7MMZyXO2jarY9Z3G0ebDJsDNg9tVWyDbfNsW23f2ZnbcezK7e7YU+0D7NfYt9i/dbBw4Dnsc7jnSHMMc9zg2O74zcnZSezU4DTobOic5LzHuddF1SXCZZPLFVesq4/rGtfTrp/dnNyy3I65/elu5Z7uXuf+YrbJbN7sA7P7PfQ92B6VHjJPhmeS535PmZeeF9uryuupt4E317vG+znTjJnGPMx842PjI/Y56fPR1813lW+bH+IX6Ffo1+mv4h/jX+b/JEA/IDWgPmAo0DFwRWBbEDYoJGhbUC9Lm8Vh1bKGgp2DVwVfDKGERIWUhTwNNQ8Vh7aGwWHBYdvDHs0xmiOc0xwOwlnh28MfR5hELIn4ZS5ubsTc8rnPIm0jV0Z2RNGiFkXVRY1E+0RviX4YYxojjWmPVYydH1sb+zHOL644ThY/K35V/PUEzQRBQksiPjE2sSZxeJ7/vJ3zBuY7zi+Yf3eByYJlC64u1FyYsfDMIsVF7EXHk7BJcUl1SV/Z4ewq9nAyK3lP8hDHl7OL84rrzd3BHeR58Ip5z1M8UopTXqR6pG5PHeR78Uv4rwW+gjLB27SgtIq0j+nh6QfTxzLiMhozCZlJmaeEKsJ04cXFOouXLe4WWYgKRLIlbkt2LhkSh4hrJJBkgaQlSxVtjm5ITaXrpX3Zntnl2Z+Wxi49vkx5mXDZjeXmyzcuf54TkPPTCswKzor2lXor167sW8VcVbkaWp28un2NwZr8NQO5gbmH1pLWpq/9Nc8mrzjvw7q4da352vm5+f3rA9fXFygUiAt6N7hvqPgB84Pgh86N9ht3b/xeyC28VmRTVFL0dRNn07UfbX8s/XFsc8rmzi1OW/ZtxW0Vbr27zWvboWLl4pzi/u1h25t2MHYU7viwc9HOqyUOJRW7SLuku2SloaUtuw13b939tYxf1lPuU964R2vPxj0f93L33trnva+hQruiqOLLfsH+e5WBlU1VxlUl1bjq7OpnB2IPdPzk8lNtjWZNUc23g8KDskORhy7WOtfW1mnVbamH66X1g4fnH+464nekpcGqobKR3lh0FByVHn35c9LPd4+FHGs/7nK84YTRiT0naScLm6Cm5U1DzfxmWUtCS/ep4FPtre6tJ3+x/uXgab3T5WfUzmw5Szqbf3bsXM654TZR2+vzqef72xe1P7wQf+HOxbkXOy+FXLpyOeDyhQ5mx7krHldOX3W7euqay7Xm607Xm2443jj5q+OvJzudOptuOt9s6XLtau2e3X32ltet87f9bl++w7pzvWdOT/fdmLv3euf3yu5x7724n3H/7YPsB6MPcx9hHxU+Vnpc8kTrSdVvZr81ypxkZ/r8+m48jXr6sJ/T/+p3ye9fB/KfUZ+VPNd9XvvC7sXpwYDBrpfzXg68Er0afV3wh/Ife96Yvjnxp/efN4bihwbeit+Ovdv0XuP9wQ8OH9qHI4afjGSOjH4s/KTx6dBnl88dX+K+PB9d+hX/tfSb2bfW7yHfH41ljo2J2GL2RCuAoAqnpADwDu0TqAkA0LoAIM2b7KknBJr8HzBB4D/xZN89IU4AVLcBEJ0LQCg67kZHY1QVvQGIQDXaG8D29nL9p0hS7O0mY5Gb0dakZGzsPdo/4s0A+NY7NjbaPDb2rQYt9gEAbSOTvfy4KB0GYP8K27CQmJ6ukVzwL/IP1NgR3fsPECIAAAGdaVRYdFhNTDpjb20uYWRvYmUueG1wAAAAAAA8eDp4bXBtZXRhIHhtbG5zOng9ImFkb2JlOm5zOm1ldGEvIiB4OnhtcHRrPSJYTVAgQ29yZSA1LjQuMCI+CiAgIDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+CiAgICAgIDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiCiAgICAgICAgICAgIHhtbG5zOmV4aWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20vZXhpZi8xLjAvIj4KICAgICAgICAgPGV4aWY6UGl4ZWxYRGltZW5zaW9uPjkyNDwvZXhpZjpQaXhlbFhEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlBpeGVsWURpbWVuc2lvbj40NjM8L2V4aWY6UGl4ZWxZRGltZW5zaW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KlBLvwQAAQABJREFUeAHsnQecFEX2xx8ZJEcBlZwETxQwgQpiwpxz4kwYUDGf6QBz1jOjf3M6s2LAdBJUEAUFE6CCYEAlR5Gg+3/fWmrsnZ08Pbszy3v76Z0OVa+qfz1TXS9WpSIlMTIEDAFDwBAwBAwBQ8AQMAQMAUPAEDAEQkagcsj8jJ0hYAgYAoaAIWAIGAKGgCFgCBgChoAh4BAwgdO+CIaAIWAIGAKGgCFgCBgChoAhYAgYAjlBwATOnMBqTA0BQ8AQMAQMAUPAEDAEDAFDwBAwBEzgtO+AIWAIGAKGgCFgCBgChoAhYAgYAoZAThAwgTMnsBpTQ8AQMAQMAUPAEDAEDAFDwBAwBAwBEzjtO2AIGAKGgCFgCBgChoAhYAgYAoaAIZATBEzgzAmsxtQQMAQMAUPAEDAEDAFDwBAwBAwBQ8AETvsOGAKGgCFQwRAYPny4VKpUyTbDoEJ8B/g+GxkCFQWBgw46qEL8Lu0dY+/YZs2ayTfffJPST7NSkVJKJa2QIWAIGAKGQEEgwETAyBCoKAj069dPRo8eXVFux+5jA0ZgzJgxsssuu2zACNitVzQEUh2fq1a0G7f7MQQMAUPAEChGgBeBkSFQyAgwQTcyBCoiAjY+V8SnuuHcU7pjswmcG853w+7UEDAENjAEmNAMHTp0A7tru92KgoBZgirKk7T7iIWAjc+xULFzhYJAup5UFsNZKE/W+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgYAoaAIWAIGAKGgCFgCBQYAiZwFtgDs+4aAoaAIWAIGAKGgCFgCBgChoAhUCgImMBZKE/K+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgYAoaAIWAIGAKGgCFgCBQYAiZwFtgDs+4aAoaAIWAIGAKGgCFgCBgChoAhUCgImMBZKE/K+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgYAoaAIWAIGAKGgCFgCBQYAiZwFtgDs+4aAoaAIWAIGAKGgCFgCBgChoAhUCgImMBZKE/K+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgYAoaAIWAIGAKGgCFgCBQYAiZwFtgDs+4aAoaAIWAIGAKGgCFgCBgChoAhUCgImMBZKE/K+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgYAoaAIWAIGAKGgCFgCBQYAiZwFtgDs+4aAoaAIWAIGAKGgCFgCBgChoAhUCgImMBZKE/K+mkIGAKGgCFgCBgChoAhYAgYAoZAgSFgAmeBPTDrriFgCBgChoAhYAgYAoaAIWAIGAKFgoAJnIXypKyfhoAhYAgUIAIff/yxFBUVlUvP77vvPrnyyivlu+++K9X+77//LkuXLi11vjxO/PXXX/Lbb7+VR9Ol2ly3bp189tln8u6778qMGTNk7dq1pcrYCUPAEKgYCJTX+Mz4y9h81VVXyZo1a0qBuXjxYvnjjz9KnS+PE/k0Pi9cuFDGjx/vxueZM2eW27s1k+dgAmcmqFkdQ8AQMAQMgaQI/PTTT7Lddtu5F2TSwjkocNttt8nQoUPl22+/LcGdyU7z5s2lQYMG8vnnn5e4lqsDBLl4dOCBB7r+PPDAA/GK5Pw8SoHzzjtPqlWrJj169JDdd99dunTpIu3atZOXX3455+1bA4aAIVC2CJTn+Lx8+XI3Nv/73/8uJVhOnDhRGjVqJB07dpTVq1eXCSj5Pj7Tv3/961/SpEkT6dOnjxufO3ToIL1793YKwjIBKctGTODMEkCrbggYAoaAIRAbgUcffdRdeOKJJ2IXKKezK1asECY8UK6tnD/++KMcf/zxTvCOd7tz5sxxl+bNmxevSM7PY21AQIc6derkJjTsMyk96KCD5KOPPuLQyBAwBCoIAvk6PvtxkLFn1apVOUW7UMZnrKwvvfSSw2KvvfaSLbfc0u0zLvft21fmzp2bU5zCYG4CZxgoGg9DwBAwBAyBEgigkb3jjjvcOVxbsSrmCzVr1kw++eQTGTVqlNMW57JfX375pTz++OOltPjBNl977TV59tln5dxzzw2eLtN9tOZ169aVxx57zLnSvv3224L7lifOGxkChkDFQCCfx+e9995bGBOnTJnivFByiXihjM/Vq1eX5557Tr7++mt54403ZOrUqc6tFmxQnnIt36lqvnfQ+mcIGAKGgCFQeAi888474jXV9H7kyJFy5JFHlriRP//801kYq1SpIvXr1xe0zT/88IO0bdtWWrZsWaIsB1gmifepU6eOMGGaNm2aVK1a1VnkatWqVap8rBNozNlwFWWrXDm23nXlypXyzTffOBabb7651KxZsxS7BQsWuDLwoAz3ECTa8RZU4pEWLVrkLtPnevXquXtYtmyZ1K5dW3bddVdhUhGLiO8kDnWTTTaR1q1bS6VKlUoUAxew9O2j7Z49e7a0adMmJo4lKq8/2G233ZyWHGw94dZ2+OGHO2EYrI0MAUOgYiCQr+MzwhNx4zvssIMb2+OhzdjLmNiwYUPBtZR3SJAIEeB9wjiIIq1r165So0aNYBH3Hsh2fMbySBu//vqrcwFu2rRpiTY4CGN8ho+3arIP9e/fXzbddFPnhRIrT0FxqTz6rw/FyBAwBAwBQ6ACIaCvGLL0FA0bNqzc7uqAAw5wfdh+++3d55577lmqLx9++KG7Rl+57vvNp1ohi956660SdXyZfv36lShL+ZNOOqlIJw8lyqtrqCunGuHI+UsuuaRE3fnz50eusaOa4yLf52B/DjvssKKff/7ZlVXte5G+/EvwoewhhxxSpMJhhN/FF19cqgzlNEbSlVELa4nrGisZqcvOp59+WuTvwfdFJ09FTz/9dIlyHhe1CJTqF31SobZE+XQO1PLp+njaaaelUy2Usv4582lkCFQEBEaPHh35zW/I47MKaBEcguP2NttsEzmvQmKpR/7iiy8WqZAVKePHRcZajfd05W+66Sb3/vDX/Ofll19epMJshGe24zPjMOOx588nfWbcDlKuxmdVikbaVw+ZYJNlsu/vO9XxObZqV7kYGQKGgCFgCBgCmSBA7M0rr7ziqnpXTBUeRQW2uOy4jgWUeEE00lhH9UXtXKuiK40ZM8Zpe0888URR4dBdfvDBBwUrHRrnRIRV09eJLvf6669L9+7dXbwibrfHHntspD9vvvlmJGMriYbY6CdaZspCL7zwggwePDjCdquttpKddtopcgw/tn322ceda9y4sejLOnI9uENsDsl7sLKinT/rrLNcW1gAjjrqKLnnnnuCxd0+7X3//feuz2i+Ifp06623uv10/2EhUKWAq7bzzjunW93KGwKGQB4ikM/jc69evZzHSizYLrvsMjn44IOdRQ9rH+M/yc0gLLbe84NwAN4fjIH+OmWuvvpqCSZmy2Z8/s9//uPGYcZj2mDcZ5zG5ZVxW4VOmixBYY3PKk06l1qSCNE+FLzPEo3m00GZiMHWiCFgCBgChkCZIaDvGKd1LS8Nuqa6d+2rcOXuWYUud4zmOUhBC6e6JUUuqetpkb5AXR0sfF4r7TXFGutYpIJlpDyWQX/PQeuftw4GLZxU0pT7kfLewokVEKsqfNSNtAjtsaclS5YUTZo0yR8WcXzjjTcWqcusO0f/NF41wlMndJGytA3PWNp6XwiLJ2W8hVPdY4u8ph9Lsbrj+qJFOmlyZdGsgxPkcaGOt7Cqy3GR1+BjEciETj31VNcW9b31IBM+mdZBcw4uqWrQM23H6hkCZYVAPlg482F8jmfh5DngQRI9Zk6ePNmd4zzvkeD4r+6kRcH3B2P1f//738gjVaGs6Oijj3b1VeEYOc9OJuPzL7/8EunL9ddfH+HHOK3KRHeN95ensMdnrJngwMY764svvvBNlemn70Oq47NZOBUxI0PAEDAEDIFwECC28s4773TMfMzmEUcc4Y6D2uVga1gIiU30RFyOz5iKhY/1IIOEdttrszmvQpmLNWTfZ/JjPx3CIotWHKvlvffeKxtttFGkOrGRPXv2LHF84YUXurhO4o2IOw1qmInpyYbQkpPUCLrhhhtKxB6RWIg+otkeN25ciWbOOOOMiLWVmKZjjjnGXceiQT/TIZJS3H///a7KNddcEze+NB2eVtYQMATKF4FCHZ+9RwceIxdccEGJ8b99+/Yl3h+M1f6dQ7I6PGtYegqaNWuWi53P5in4ZaJ4b7GUlCdiRK+44gp3iMWVONMghTU+B2P9yROgAmewmbzdN4Ezbx+NdcwQMAQMgcJDANdYL7h5IWzfffd1N4Lw6AWpZHfWrVu3SBEEumTkXT4zTZ6AkAfhlkuynGSEy+see+zhBDEmPCQN8sT9Z0Ms6A0hWHbu3LkEKwRhXLMgJk+JiAmRJ+965Y8TfSKc4q4G4X7sBddEdeyaIWAI5D8ChTo++/WSSWKWjEigphZON3aSkI31hIP1fPK2ZHziXffJ5HhXsG5xkAjJ8JRM8Zjp+IyClbWl1VLtFI9qvXVKUt9uvn6awJmvT8b6ZQgYAoZAASIwYsQI1+vjjjsuYhXDeqnunu58OmtyEm8JkaU1GREPCWHNy4S8kOdjHxPxePLJJ10WRbTYEC98dV91AiLH6tfER8akbr6ubosWLWLy8NbXZLgENeExGcU5Scynj0HieUZngIxTzU4bAoZAniNQqOPz9OnTHbLxxkQPO2Mv8ZTEuSMYItRxfPrpp/siWY/PftyNpZgkA7knP4774+jPTMdn+JCZV5MgyfDhwx1b3kn5TiZw5vsTsv4ZAoaAIVAgCJBk5tVXX3W9Ze1JrHN+85bNhx9+OOGalP5WSf7jLXixlkjx5fynT0jkhVR/PtXPzTbbzBUlvX0iwlJI4h9IM9661Pq87DWWx2nS49XFlS1Vat68uSuqsUKlqsAHKwXk+1yqUBYnmNiRjAK69NJLS6Xiz4K1VTUEDIFyRKCQx2eWyoK8sBcPxg8++EBY9xnSjLbCGEqIB6EJiSid8ZnlqSCN5S/F8rPPPouc8+UiJ3Kw4xW5JHfL1nKbg+6VYGkCZwk47MAQMAQMAUMgUwQeeeSRSFXWI+Ml7jcvCCKw6XIgkXLxdjS5RuQSLqvJyMdu4j7lyWubY00MfBn/6V1iyUarSYX86VKfmpAicu7KK6+MuT5npEBgB227Jt4JnIm/6ydXYBXtIuwtj9T2mMbnlN4VXNFOPvlkVwneZIU0MgQMgYqBQD6Nz8H1j1MZn/0alD5+Mt4T8e8NMoGT8TzYTrw6nE9nfPbvI7KlRwuqwbh61kHONQXbi7VWdK7bT4e/CZzpoGVlDQFDwBAwBGIiwIv3rrvuctdw9cFFNXrba6+93PVHH320BA8SHwTjHkkSRIIFiDhQ/4L3lVj6wxPtXnvttZHlO0455RR/SbyGGasqySMSEUklfDIeeASFTiwD1113naveoEGDCJtgbCnadG/FDWqafR+o5CdD7CdavoX0+n7pFqyMa9asoYqb3JCOH8L1d5dddnH7Yf0jWZJfBgWLANhq9t7IplkYw2rK+BgChkAZIpBv4zMhED6G8aGHHko4HgKTZsx2aBHGwHjP/Xgint6Hang3V5Ke+TGWTxKfeQqO7ZmMzyzNAhG+4RPkccw7zCdaI8yiXr16nA6FcBX+5z//KWPHjnX8UA4yVoMdhEDuQy3ciXz8V6Y5dK0xQ8AQMAQMgZwjoO8aggiLynJZFJ/Knnb1ZR/zHnWtTNcvypAWP7gsigp7RSqQRpYD8fcQ5OXTy3ONZUZIQa+TlgjPgQMHlmiX9PmeD/zVHctdj7UsChdIpR8sr2tsFvklXTj//vvvu+VB/HIr8GQJlWAZylFPhUTXFp++PNdYuoRlRlQIdtf5F70sCucmTJgQ6YtaG107QT5q0aWYI49LkCcXWDbF38/ChQuLC8f5r8qBSFlfJ/qTpQXKkmxZlLJE29oqCwTKa1mUfByfhwwZEhlzVMEWgd/3NXopKY3FjJRn3Gf8V0HLnWMsXrp0aZEm04mUYdxkaa7guMmY9u9//zvSVqbjc3AZLMb0ww47rIg+wJ9PDQeJtBHG+Mx478dj7t235c/xbipr8m2nuiwKwbNGhoAhYAgYAhUIAf8iKEuBkxcu7fJyj0cIfL5vGmdTQuBkcuCv8YnQxvpqQfIv7qCQSVmONdFNEetXBonJRHBSo5ZKd5mJiW8rWhBD0PMCoC/DpyadiEwipk2bFpnocI2X/0UXXeQmO75vQUGZddKiJz2a7CHSVSYs8Bk5cmTkHDvUYyIW7AeTsOjJhcdFLccl6gcFTvYTEfcXbCfWvl9XNRGfMK+ZwBkmmsYrHxAoL4EzH8dnlI6Mm4w1jP+e/NqYCJPRxBiHwi44PjHmsj6xhiC44ppHoIRAxnXNKRBZc5O6Qcp0fFavllJ9QWmqni9B9pF1krMZn1esWOEE5WhBE2Wnxq2WaK+sDvwzSFXgrETHtJKRIWAIGAKGQAVBwK9RqQKnDB06NG/vavz48dKnTx/nWkUyCLL64cLZqlWrUunmuYkBAwa4hDlqyXOZYXFpwm2pSZMmCe+R2Mm5c+e6tdpSjenx67exBifxqB5T3xCvTnjyiVuWv46rF+nwycwbTJlPOfqLKxRJkNLJUIgrK3VJJlSnTh3fhQr/icswcVI6oSnhjlzhb9xusMIiwPfZu8Lb+Fz8mBnzCUNIJ+aR8myM/cEwB//FYRwm5IFr3s2WaytXrnSJh1TA9UXdZzbjMy669CXee6tEQyEc8N7hncZ7hLU/y4v8Oy/V8fnv/L3l1WNr1xAwBAwBQ8AQUAQQ7NhSIQS26ElDvHq8lH0innhlos8TD9OxY8fo05FjXrbB+B9/gURFpKyPJspnmlWWZBCxeEa3YceGgCFgCOQKgVyNzygN0413RIgMCpLR98w4HOv9wLqcsc5nMz43bNhQ2MqKUsnaXlZ9SacdSxqUDlpW1hAwBAwBQ8AQMAQMAUPAEDAEDAFDIGUETOBMGSoraAgYAoaAIWAIGAKGgCFgCBgChoAhkA4C5lKbDlpW1hAwBAwBQyA0BEjlTmr3VNcPu+2229xyJYlcXUPrnDEyBAwBQ2ADRsDG5w344efg1k3gzAGoxtIQMAQMAUMgOQIkwOndu3fygutLbL755imXtYKGgCFgCBgCmSNg43Pm2FnN0giYS21pTOyMIWAIGAKGgCFgCBgChoAhYAgYAoZACAiYwBkCiMbCEDAEDAFDwBAwBAwBQ8AQMAQMAUOgNAImcJbGxM4YAoaAIWAIGAKGgCFgCBgChoAhYAiEgIAJnCGAaCwMAUPAEDAEDAFDwBAwBAwBQ8AQMARKI2ACZ2lM7IwhYAgYAoaAIWAIGAKGgCFgCBgChkAICJjAGQKIxsIQMAQMAUPAEDAEDAFDwBAwBAwBQ6A0AiZwlsbEzhgChoAhYAgYAoaAIWAIGAKGgCFgCISAgAmcIYBoLAwBQ8AQMAQMAUPAEDAEDAFDwBAwBEojYAJnaUzsjCFgCBgChoAhYAgYAoaAIWAIGAKGQAgImMAZAojGwhAwBAwBQ8AQMAQMAUPAEDAEDAFDoDQCJnCWxsTOGAKGgCFgCBgChoAhYAgYAoaAIWAIhICACZwhgGgsDAFDwBAwBAwBQ8AQMAQMAUPAEDAESiNgAmdpTOyMIWAIGAKGgCFgCBgChoAhYAgYAoZACAiYwBkCiMbCEDAEDAFDwBAwBAwBQ8AQMAQMAUOgNAImcJbGxM4YAoaAIWAIGAKGgCFgCBgChoAhYAiEgIAJnCGAaCwMAUPAEDAEDAFDwBAwBAwBQ8AQMARKI2ACZ2lM7IwhYAgYAoaAIWAIGAKGgCFgCBgChkAICJjAGQKIxsIQMAQMAUPAEDAEDAFDwBAwBAwBQ6A0AiZwlsbEzhgChoAhYAgYAoaAIWAIGAKGgCFgCISAgAmcIYBoLAwBQ8AQMAQMAUPAEDAEDAFDwBAwBEojULX0KTtjCBgChoAhYAgkR+Dnn3+Wb7/9Vr777jv5atIk+U73q1SpIlt07y7devSQDh06SPv27aVRo0bJmVkJQ8AQMAQMAUPAEKiQCJjAWSEfq92UIWAIGAK5Q2DFihVy4YUXyn333ecaqV+pkjQrKpKW65t87p135M7KlWXZX3+5M5dccolce+21ueuQcTYEDAFDwBAwBAyBvEXABM68fTTWMUPAEDAE8g+B8ePHy6H77SfVli+X07V7m+nWSIXNUqTC5jw9OUW36667Tl597jl5adQoZ/UsVdZOGAKGgCFgCBgChkCFRcBiOCvso7UbMwQMAUMgXATOOuss6dOnj3RetEguWLtWuiv7RM6yzfT6HroN0e0Xdbvt2LGj3HnnnXpkZAgYAoaAIWAIGAIbCgJm4dxQnrTdpyFgCBgCWSBw9tlny1133SUnK49eafLpouWv0e1x3eDTt29f2XLLLdPkEr/4pLmPyJJVc+IXWH+lQa3W0qvlwLjlZi0aI7MWj4173V8Iiw/8dms/1LMt9bl41WyZPPfRUudjnQiLT8+WJ0jDWm1iNeHOvTtzeNxrwQvJ+ATL2r4hYAgYAoZAxUbABM6K/Xzt7gwBQ8AQyBqBUeoKi2VyL+WUrrAZbPw4Pfhdt75qJV2sLrlh0eSfH0lJUGzXsG9CgXPm4jGSikAVFh/uP5mg+M7MYRRLSmHx4d4SCZyp9icZn6Q3ZAUMAUPAEDAEKgwC5lJbYR6l3YghYAgYArlB4IyTT5b2yvqAENifpDzWadKhc049NQRuxsIQMAQMAUPAEDAE8h0Bs3Dm+xOy/hkChoAhUI4I4AK7aP58F4cZRjd46Zyg2x0PPCC9dtpJjjsOu2d2tF+X2+WPtUuSMqlZrUHCMrjbtm/YL2EZLobFJ1lDLeptJYN6jU5WLOn1dPhQNhGl2p9YfLy7cs2q9aVl3cTtJOqDXTMEDAFDwBAoLARM4Cys52W9NQQMAUOgTBF4+7XXZDdNEOSXPAmj8X8ok611G/Xqq6EInGEJL7iSJnInTfXew+JTq2oDadeoX6rNxi0XFh8ayKY/Iz7p5/qIu+2gbca4fftnCBgChoAhUPERyFjgHDNmjAwfnlrygIoPo91hoSNw/vnny7777lvot2H9NwRCRWDVqlXy3Zw5sluoXIuZsZzKJ7rEipEhYAgYAoaAIWAIVGwEMhY4b731VkHoNDIEKgICs2fPNoGzIjxIu4dQEZg1a5b8qetpNg+VazGzNvox8uefZZEusdKoUaLFVRI3PmLSLq4AWVETZaBNzMWuGgKGgCFgCBgChkCuEMg4adDyEDMM5urmjK8hkCoCCJxGhoAhUBIBBM5alStL05KnQzkiCRH02WefFe9k+N8tZaLLmbCEiJEhYAgYAoaAIWAI5B8CGQuc/lb69esnRUVFthkGBfkd4PtrZAgYArERmD59ujSvVCn2xSzP1tD6bWrWlEsuuUQWL16cJTerbggYAoaAIWAIGAL5ikDGLrX5ekPWL0PAEDAEDIFwEGjatKksV2VarmiBJiNao261Xbt2lVtuuUWOPvroUk2tWbJEFk6dWuq8P1Hz8+K9PxbOEbd2i78Q9ZmMT7B44+7dpXqDBsFTJfZ/GTu2xHG8g7D41GndWuq2aROvGVk4ZYqsWbo07nV/IRmf5erpsUJjdpNR9fr1pfFW8bPMxuPjn5X0WJesCbtuCBgCBYxAWGNSqnwyHZOiIU7GJ533SIu+faPZR47T4RPWeyQbPslwidxYgh0TOBOAY5cMAUPAENiQEejSpYss0BjOlQpC7ZCB+En5rfjzT5n03nsyYsQIOeaYY+TNN990gieCrqcFKky9ussu/rDUp8+eO1ceETnq4VLX/YlkfHw5PvcbPVpaJvB+GJngWi749Bw6VHoNGxZkXWL/wyFDJBUhOBmfGY88IpNTSAbIRGr/BDkc4vHxz0qOnC3Sv8Qt2IEhYAhUIATCGpNS5ZPpmBQNeTI+6bxHBiVQ1qbDJ1/eR/9UT6REithoLKOPK0efsGNDwBAwBAwBQwAE2rcvjrScmwM4vlWeTevVk86dOwtJ6N566y0Xz4m187HHHnMtzlWh5ptHH81B68ayXBH4BhWGkSFgCFREBFBapeIpURHvvSLfE0JyNmQWzmzQs7qGgCFgCFRgBLA0btqsmcydN086hnyfPyi/HluzGmcx7bHHHvLFF1/IRRddJCeccIKMGjVKTm3ZUr5RqxuEljcW3b8+S22PlifEuhw510RdQOPxiBRav0PZRFTWfBK509LPPrffLqvV9TgZJePTeeDAhJZdz79GAndjysTjk8hS7XnbpyFgCBQ2ApPWe2N00nGcsSARJRuTUh3bMh2TovuWjE8675Fo3sHjdPiU5/sIl+bx554b7HrG+yZwZgydVTQEDAFDoOIj0HOHHeQTFf76rlkT2s3+rJw+r1pVLozhKnvjjTfKgAED5IILLpB7Xn45sgZoPBfXVeu7Vat9m4T9wxUoHo+EFWNczDc+ieIpY3Q/7ikmf8kmgHErBy6ExSfA0nYNAUOgwBBgHMh2rMy3sS2s90hYfPhKZIux/1qFxcfzi/40gTMaETs2BAwBQ8AQiCBAFtmdXntNcGxNbEOMVEm684qW2LFXLxmqsYmxqH///vLpp5/KjTvtJPLBB67IDz/8IK1atYpVPNRzrAv67bffynfffec2XHw7derk3Ivr1KkTalsbGjNiSKEwhNoNDTu7X0PAEBCZp942M2bMkG+++cat4dytWzc3Nnfo0EGqVKliEIWMAGN1WOO2CZwhPxxjZwgYAoZARUJgu+22k8uuuEKGDRsm/9Ab65Hlzb2t9Uks++ITTyTl1H/XXWXyeoGTicXNN98sgwYNiltv/vz5Ekw4FLdg1IWZM2fKhRdeKBMnfShzf5znrtZuUFmatakq82//U1Ys+dOd69i5rQzYY1+54447ojjYYSoIJEp8lEp9K2MIGAIbHgKR8fn992XuggUOgMYqXNbXNaJ/1sRzqzWxXa3q1aVTx45y1HHHycUXX7zhgZSjO0bgDGvcNoEzRw/J2BoChoAhUFEQwBL5mrq33q/xHLfqTW2U4Y05QVPrPvXkk5GERKmyOlfjSE477bRIJtt27dq5qru1L7aatW/YT84fcn4k4VCqfF966SU5+pgjZdPNq0rX/X6XPptoMqO2InUa/6Usiv11l88XmT9bZMHs7+Xhx++Tka+/KPfe9YDstddeqTZj5QwBQ8AQMATSRMCNz0ccIR1UuNx59WpprvU31a2OCprCpvSrbnM15GPuV1/Jv/71L3lO3y//p4nntkoSi+8q278yQ6BymbVkDRkChoAhYAgULAKffPaZbNK8uZyndzA5g7u4V+vco9ueu+0mR8VYbzMZyyuvvFLGjx8vv/32m2DtvPvuu12V3dsPE7aJb/0ijz/+uPz73/9Oxipy/cgjD5eDDz5Ytj1qjRx83e/S4wCRtr0QNiNF3E5dXaWl3TYi2x4mctgNa2VdrZ9l7733lrPPPrtkQTsyBAwBQ8AQCAWBw/bf343PB+p6zYNV2OynXLvoFh3YgBDaQ7d9dTtDt581+dzWmpAO4dMofxAwgTN/noX1xBAwBAyBvEbgp19+kdtuuEEe0F7iELsohd5+pWWwQU7VDVfUN995J4VasYvsoAmMEDovvfRSGTx4sOy3334unmexrg921llnuUpXXXWVvPrqq7EZBM42a9ZUnnnmOTn+TpHtjwhcSLLbtI3IMWrm3f5IkTvvvFPOOw8R3MgQMAQMAUMgLASaNm4s4zRZ3bXKsF8aTLfUssN021W3G/RdZeOzApEnZAJnnjwI64YhYAgYAoWAwBBdtuTDDz+UVZtvLldpHM3N2ulndPtYNwTQxbphAX1etxt1U3lOum2/vUvE44VCPZUVXaExpR9//LEsXbrUWTtxbV25cmWE54knnii/qHAcj8455xyZP3+BDHpMpHmneKUSn9/5nyL7XChy2223yZdffpm4sF01BAwBQ8AQSAmBM089VZapEvHKdeukUUo1ShdSZxQZqJuNz6WxKa8zJnCWF/LWriFgCBgCBYpA7969ZYwKfPc99JAM0MnBKnVxfbZmTblU7+cS3Z7XbK6VNNnQMWqJfPHFF+V/EyYIWQTDpG222UbGjRsnhx9+uEycOFH++OOPCPsVK1bIKaecEjkO7kzQvmBp3XKASP2Ng1fS3++2m645uaPI0QMPTL+y1TAEDAFDwBAogQDj8z0PPCAnFRWVOJ/JwfZaCVfbgRoDalT+CJjAWf7PwHpgCBgChkDBIcASIccff7zcOWKETFAL3/JVq1xc5dVXXy2Lli+Xtz/6SK685ho56KCDcnZvy7Wd//3vf6X4I3y+/vrrco22H00XXXqubKyy74Bw1rKWXU4T+XrqTLnm2qujm7LjKARmLR4rbHOXT4m6YoeGgCFgCIhcePrpsoMC0T0kMA5VPlOmTZNrr8U516g8ETCBszzRt7YNAUPAEKhACLA22iOPPBLaHZGOfZBqutli0QUXXCDLli2Ldcmdu/zyy+Wtt96KXCe284MxE6XvSZFTWe/Uayqyi67UcvllV8jcuXOz5leRGYzs10/YXj2V9B6FRyzPcP7557uEUYceeqhcdtll8v3334d2I2s00yZJr0Zp7Fo6tKsuH7TxxiXN9ZnySqfdYNkj1IpUr169hL/Hr7/+Wh5++OFgtcj+R6qgAs/PP/88cs7vfKYJy2699VYZOHCg7K+JZC5St/5PPvnEX7bPPEPAj9npLqdBRtq5s2bJ4SHeDy65h+n7g+/WwoULQ+S8YbBaqJnpX91lF7exnw1Vzaay1TUEDAFDwBAwBDwCTJS/++47eeqpp+ToDDLRej6pfDI5uf/++6WypsuvrmuwMcGORf/85z9djGWjRo1c35q1qSJtehSn049VPpNz7bcTefdukU8//VRatmyZCYsNok4tL0tUXVlw9ztp0iTBjTuaGmtyk7ASkyBskfSqR48eaS25s0DXJpw3b16JrmXKqwSTNA6IocbjoCiOcghWCOzEV7do0UIGDFCf9gBN0cksVijufcstSf2iixLpbxqPCTAJEoqjm266SUaPHi39VIFhVDEQmKwJ4TbV71CtkG9n8/X8UIiSeM4odQRWL1kic8eMcRXYz4bMwpkNelbXEDAEDAFDwCHw888/Cy906OabSSWUW8JVd/bs2XLxzQfJgMO6S9sOm0UarFGjRmSfDLaDBqkJUmnqtI+kYatwhU34EgtK9tqPP/mIQ6MKiIBfbgc3bVy2V6kL+X//+1/BshcWsZQDFsAR6qaeLYXJK9u+RNcn7jpRUi9f/pJLLnHCZt26deW5555zQjWCLQInicKIJTeqOAhMVMEmF+o6bP91VTE5Y8aMigNWAd6JWTgL8KFZlw0BQ8AQyDcEsDZU0ay1f+pi3LjAjRw50rm/5bKfrVu3lj+7vSBduokMHj5Utq4/2GXQJYsusZ1YHBEOnn/+eWcR+eqrL6RJ+9z0aOOOIhM+fj83zI1ruSMweXLx6rNnnnmmeIVGLGFznWbWxLqIMqRp06byj3/8Qxo0aBDpP27XKGe6du3qLIIkSampCbd69erl6rDGLFZTT/ye4IfnAHHTuM9i0U9EKFkoH+Q1Z86cUlZQz6Nq1apu3UJ//Ouvv7rf8F9//SXt2rWTzTUjdTQhcPM7x7KarpURSyieB2+88YbzUIjmzTGZn3GjhaZOnSpt27Z1+/zbd9993RY5YTsVAoHJGms5MEd30ky/y9Mtm3iO0E2NrQmcqeFkpQwBQ8AQMAQSIDBGtdO4tzJBrlSpkpssEm9VltSkSRM54IAD3Ea7TIoRPtne0fU/p385W3bdJTc9aqVZLiY9Ycuj5Abd8ue60047yQsvvCBjx46Nq0j59ttv5ZhjjikRX4h1DtfvI4/UhVuVEKJuueUWufjii906gZzDjfSMM86Qk08+mUO3f/fdd+vSPfNle11SaJbGtXmCH9ZW4pfjEZmho3mxdi2u7rGoT58+8sEHHzh3WFxV6VuQWO/2Ic1Ize8LQgDeZ5995KeffgoWS3mfeyC2GhwuvPDCmPV8LDj3GhQ2Yxa2kwWPAO7Wi9V6nQsLJ+Bsotu0r74qeJwK+QbMpbaQn5713RAwBAyBPEHgzTfflLVr17reEMfFxPy9994r197VqlVLdtttNxk6dKi8/fbbUrVaFVm3JpddqpRL5sa7HBHw1kwUGiQOio6ZxJKO5Y1kNoMHD5Z3331Xhg8f7np81FFHlUqGw6L05557rhO87rzzTtl9993l3nvvLXGHWEhbtWolCItYA6kDIaRh9YtHsXjhVv7YY49Ftgd06QlPCMTQk08+6YRNYigffPBBt6QRrqu4sGLZhRYtWiQ77rijEza5P9bDTdcFmHr0MVHyHxIMQbRvVHERIAzj9ttvdwmhcn2XNjrnGuHE/M3CmRgfu2oIGAKGgCGQBAFiY3ATDFK1atXkP//5j/Tv3z94ulz3O3RuJUt+LZ7Iht2RH3T+333rLcJma/zyBIHDDjtMXnvtNUF4xErJRrwlmVMhLIBMnhEivSso7q/Nmzd3McTEe/pkOJQ/4YQTIuU4hnbeeefincB/XMPxHIAQvrDaDxs2zAl63bvHXjwCITWaV/QxWTsh1qTFvRfPBKysEMm/fPIrrJubbLKJPPvss/J///d/zjUdl1ju08e1kkwJF3qWIkqFcOEFO1x1sfzimhtNX3zxhTuF23yQsDIHs/j6xGHBMraf3wjgcYIChWfp4yoROr/S78FctXJ2zkH3eTvtoO7tRuWHgAmc5Ye9tWwIGAKGQIVAgPhNBExv4eSm2GcSisUnVnbPVG58ucbBrdDYM6hF376pVElYZotu/5AJM3IjcP72rcghJ2Tfx4Q3YBfLFQHcSHFvxSJ55ZVXujhEYjURAP0yHbimBl3JcYuFiKkMEsuqpEJLly6V2267zU3OvdWPetkswYP7OxlhUQZ5yyX3gSCJu+tpp+nisgFCyIUo49e9PfjggwMl0t9FiMVt9pBDDnF9wK03SGSVxmX3999/D552gjbWV0/33XdfRCD35+yzfBH4Rb1boDqqLKjbpo2sXr3aCZgImWQXZ3kS4pbxCiAemeRP55xzjrymFva5+r7onIPuz9P8Al1UsWJUfgiYwFl+2FvLhoAhYAhUCAQQOLGQRBPJVZic48qXCc3QCenk9W6J8dbiTIfvlptvJ2+884JWWZdOtaRll/4mMn+2aOKX0stmJK1sBQoKAeIYcQnF4klCIKyZuGwjjEGbbrppJNaRYyyclCMGNEgIXMmIBER77rmnE2axjiLkIoRh1SOhTyaESyzuwQiWCHzeeurjMevXr+/6HOSNFZK4bPo8ffp0dymM5SUQWk866STnvkuCryB16dLFuSFjNW7f/u9MX7gVs3EuGNsarGv75YsAa+0u1i4sUxfzzzW8gnAL3g/B5asQNiGWvsETBtpe13scm4P1VXV4luXafufOuRBlXdftXwoIVAiBE5N8s2bNpGHDhinccrhF0K4Ry8Gacx06dCjBnBcC2s3oBZlLFCrDAzLP0RdeHPlEaGq9+04+9cv6YggYAqkhQEKeWBNgNNuPP/64sLxBrEyXqXEPrxRj9LzZ62S2zm3b9AiP78yJxbxI/mK0YSCwxRZbOCHy/fffd660CEBYDknmE9ZyHcRGYjlFKMOdFcIdEYEz0/c41kvmLCQQ2myzzSIPCzdciN+pj+mMXAzssIYmltBly5aFMufCeovCKmi9pTm8InDjvfHGGy2OM4B/oeziBH6PxvAvDVioo9dK5juMpd27mvfQBFlP1asnq/S7VSvEG522nlenTp1C5Gqs0kWgODAg3Vp5VJ7kFLivIPiVBzFYot0kO100HXjggU5TGAzOjy4T5jHa0HiEfzwvimOPPTZekTI/z7NjIoom08gQMAQKEwGWi2AZBlxqowm3OLTKWDnzgYhH23WPnWXs3x55WXdrmXpMjh5RycW2meIsMZztLjpB2LY65dzEBfPwKgpbvueecAtE2ISwFnohEwEpaO3HtTzaLdTzSPbprYnEjXr68ccf3e6KFSv8KeF3BkW77UYKrN8hzpT1LFkHM8iTy8RKorhHeTRx4noNyvp6wfv2ls177rln/dXij1gKpxIF4hyAHcmKPDEvgEhyxDWEeJImBec34Bm8f1/XPvMHgfralaGaYCseEceLUuFf//pXpAhrK7fW98WzkTPZ7yxSFs+pYEv25eByQ9lz3jA44BLdU2UcNvazoYIXOImXwOUC7Z8fqLIBJMy6rHsFRWezC7MNePFC40fLSydeW95dJl9cUHh5DBkyRK6//vqw4TB+hoAhUIYIMCHkd4xijWQeTBTRWDMBZVLOpDl6clqG3SvV1H13Pyi/fSfy5m2lLmV0YrTqOnfsv3XCZSoyYlwBK+1+wyPCtt1p/y64u8ONk3fs8ccfL6eeempkqQ6ENxQNnGcJk1deecUdk1CH3wWW0HhLfyQDwS+lglKbOQ48vaCI54B3S8TDCiLG1AvB0bzxBMNSCuF5wO/Ub+edd55zrcXFFuI+yLh71113uXvlvr///nt3DSUxdPnll7u+sHwLfIKJfFyBNP7RHu7CQULYfPTRR92p6667zq0pSjvE+uGiHG+uE+Rh++WLwE5qUSS+ORYxByRhVTTdr5b3CXpSc7CFQs8rl101RjTRMkKhNFRBmSBk9tJnyJatwFm10DHy1kMEKRZQ9lrGfLgvMtp99NFH7iWQy/4Q0O/TpcdrhxcW2qToOJJ45XN5/itdC4lFn32ShVy2ZbwNAUMgtwiwRESQiGVj7MvU5S/IK5X9dg2LE/U0rNUmleIu9IGJztlnny076Dy9/sYpVYtZ6Kt3RWZ8IDL21zdiXreTFQcBXE5xmUbQ84QgybsVwmJDUhQmtlgSr7rqKneeDLA9e/Z0+8F/PnYyeC56n/kMgh0xbqeccoqz+JHdFSskLrFTpkxxwiHxpMw1sF5OmjQp5nveC4q0gVAcTcSikgWX3y6/DTLOsiH4IVRzfxAeCwi1ZNkl8y5bu3btXOwqiu1U7itWGfoHfsFxA4sXy7+QPRdXYowLEH3iGplyfb/cBfuXVwj8pdbqnzR7Oc+oiibtQdEBcYzVervttivVX8Ieblu/FuwdGcYpe6ZjdIfI4F81G65R+SNQSa2Cxf4LafZlFw3uRbPdr18/53+fZvVQii9YsEBYp8oTsQnR61hxDXcQbpMYT/bRuFOPxYSjBys0hrhrkEGLAGdcVJYsWeIG2Vgxogy+DIIMlH69KDQ3xDd4qqc+6dHtcG2lpn/2AyhxE7QZTdwjZRigKUNAf5BoCxcbBnyIOAgfM+rdbPz9c502NtpoI3ZL0G+//ebulaQAuNYEB30KelzAgGtkzkNwxAUHHBlMUqHx48dLdDY6Xh5BvFLhE1YZ/z2GX4Y/hbC6YnwMgbgI3D+pv9SoWk9qVqlX/Kn77th/Rp1vvUlHWaPJHS+/dFhkQhyXecgXXn75ZSEDJ2NTtjRJtaphJg0K9mfAXnvKW2++LUfeJNJqy+CV1PbfVi/hKa8Vl2XitPfee7utV69eqTGwUkkR8ONzec4zojuJZY0YRgTQWG7klMfriGWCCGMhcVa2hFKZeUnQJfCXX36RBg0aCGvNesKVF1fT6HmCv57OJ3MP+Pn5RKy6zI34nZNIKQxiXsFcqXbt2qXYgSmGBe45UZ9KVcyzE8yb+V5Dw3R88wqLXHWTHBkkZBo/frIq+Ser9X0T6dt3W7USb+EsxbHmndn2ZYTOEWcrk5c0YdbvGmqBhRqPNr5TzB+7desmftmbeG3tobLFJFUynKXfr5bxCiU4/4he+2j9dfD24zMKIKNwEPByQqrjc7HKKpy2y5yL9/vHHQPtHse33HJLKYHKC17E77CAcZCIwUSb5zVuaBJxTyW4mIE06IKKoITLSXRyoCA/9lnw2QufHDMBY7FoT59//rlzQaHPQUJLSawl7jloFtEAUTZIpBDHPQ1BD0LTyQ/JU/DHRDY6Bm9//5QJrhHGMetf4bbjBV/OIQCSNMC783DO44Jm9JlnnnHaVc5DtPniiy+mlAHMu8Fcc801DmPu2cgQMAQSI1CnejOZ+usziQsFrg5eX/T3oqvkyjF3S00EUxVK3SeCqxdU/WeSa9WqlFZSBZorsUtWTiaqJEwLKgRLFMqDgzdHveWsM7gobr2fJik5WKRBCjOb6WNFxj2kbonLqqui8WWnmETh+PTTT7vJIwo4P7kJjs15cMvWhRAQ4N3r37/x2CGItlFXtLAIoTIoWMIXYTaaUPyGIWzCNxUhEuEvTErUdzBFwW+UGgLkNbn22nvUIPGFGgQ20zG5i1bsrttCzRr+gJ6brvPe1Tp/20ouu2yIy7qcGufkpcZpkad066ffj2d0iRR+Lzxb5uAo9lOJ6X9bBfPHNGnWCWpI2gNeujXSLRm9pwWY5W+k89j3dL6KcojxGa8WPBJwcffjc98QltpK1h+7/jcCBStw8qX1cUFoT/DtR2hCUIsnxCBsIgiiJcTShjCJAIamMNolFV78SAYOHChY/4hPwKWjY8eOTnOZKDkE/JH40WRFE/0jNgKC/x577OEsnQippI5GiwchaLIh/OEK++WXX7qYBRbKRTgmextEP7hfXGkg3Ey8dhDNKhoI+oLl0wt7rqD+Q+D1CQAQGlmkGqvle5pZjEkYAqtfCNrX8YLz7rvv7iy/uMXCm9gQ4mmTET92EiwhtOO6syETk3Lcs1AaYIVGa8tASKr4VC3GyfDjmYAzygss7akQ30X6QJA9vytPP/zwgxu0Tz755DJJ9ER/+e77dd98P4KfvLj4TvoMi8FruLTVqVPH/caD57EUsBYYGlZ+VwhFWKVw845l/Q/WLa/9Lk32TkvgjPSz0p+ycs18t0XOZbBTpVK1khbVoNBapX6Ja4sqrXEtfDzjFdmyencn3HoBt3qV0paLDLoTWhWUanzHBp19tDx13qfSpuc6adZBx1Wdm7Xc/O9m5mqaQ7afdZuhs6l9Duwvr730v0gBxk6UnYzZTG7YiG3j++cnN3zmswAeuRnbMQQMgYJFAAv3IYdcKGPHjlcXVty6e6qwuWmp+9Hph57/Rl2WX3Yu0wcf/E9d61U1aVkQcfvMGRA299ftCl1+xytnmPeynBD9Y06aCh2vvPrstpucpnPcG3Rs7aqdbqUVO+rm74jFVWYFtq91/0Sdvz6oLuee/Lq4zHn9+ExyL/oWHJ/93NnXs89wEShYgdOnIucLQ1ziMccc47TLxDfEEziZZJI5FsJ6SWwEgiZfPNxx0Ux7wpr59ttvRyag06ZNcz8ShDYmsrFcd31dJjCk+SZuI7i2FJqWE0880RUjJoK++gkuriS47+LOCiHoYZ0866yznBss/aVNrLEIl7jr4P7avXt3l0zAC5xotfwP3DHSf/QFq21QqGZggBfEhB2rpXf9wfoINtRB8Ay6EiMAIwBwjxBCPD9mhHGSCgQxdAWi/iWzDkcVr7CHKEzA1j83cOX7gUIBPMMSOInL4XuPJi9odU8ELO5b9CU6hTl9Y1LN97Iskj3RB1y2EhGJNPjuotyhX0FCYMYKgFLJUyyLPtfwXHj++efLLTzA9y/eZ+emf3sxxCuTy/N/FmmmzbUL3ZasnXXF8qY89sEp0kbdeoNUuVLVktbVJJbVeStVwltP7NesWizc1qhSx5/O+pPlLN59faKbDH348bsydeQX8t59S6VajcrSqmtNmfPVH7JuzV/StlNz2abn9nLxQ/s75USshkmWxMbYSVZTP7k5/fTThd8VEy0/wcGtzMgQMAQMgbAQQHmNEUPkTN0m6JbMM6WTlrlIt546TzhF3ZVbaAzw+0m9+LRCKcL4gLCJ2+xZejXW6EbSq3SJ8fkddQlGWP1ADT//U6PIMyq01lVrfhO1en+/fj3PrTSsrLfKArdpAq1iDEq3hDekT1A1c+bMyPjMfBzjjB+b+QzTQ6F0TzbMMyVnaAWEgV+TCqsEk3OEJnzhsUTGW9cR65onJqd8gRH6ECIRLoPWHAQjLwxSh/hJsqghmOK6m0jg9G1Ef+KOSlsIF9QP8sfdIJhYgGOf2Q6rJ3GaWBU9kZgjlYWjffnoT6ySPmkPgqgXNinHBJ1zTPjHjRtXwh0Yy5cXNimLi4QXlkgYkEzgpI6RyMcff+yETRYJRyHA9w3LL0kggs8iW6yuvvpqN4j6mJFs+JGFkXgPb+XOhleYdVF2oCRJFgvD99Ovk3jcccfJRRdd5Fy0OI+7zdZbbx1mt0LlVbtaE2nfaBeZuWh0qHxzwaxqdZEaashcubg097+K1qnQushtpa+WPtPwV5GG60/f8mHXSIFKUtlZVTeq1lAO7jpCOjb+e2yMFEpzp/j7U5wABms+ygmXlOW67WWrrbZKO24M12IUjGx4M/BuQgAl8yffPQROP8FJVeOf5i1ZcUPAENhAEGBeWrzs3X/1jo9I8653VVfXWZqb4xTnxcfcD++MVAlDB0o1DDp46r2k3lphkxuf1yfowgOR5bhQ6hE/j5Ivnf7SNwRZDDps3K9XDtLOmWeeKdtuu60bn1HUs2+UPQIFKXCiQfGZ4tBMQFj6SJyDmyxZ00jznYyY2GMdxXLjU34nquPdT/lyYpFMFG8Qiw9CHrSbuggE4ypjleUc5n80QmitoinaPTb6erJjtDsQwmJ0XASCMBMsrMjBGNZ4PLEigUkya1S8+hvieR8zi2XeW31x12aLJlxQsJSzxAQLdTO4ktAKwlLKwMtzZAClHN9llBdYVEj2xG/DxyhTB5dSJtKUow4Dtg/+5nosYokfvnO4nvK7weLOhDweoR307oOUxdUQJQnnSGkfK/YHF24EcWJ10s2mPGzYMJeIYeedd47XJbn44ovdNV4wwXTsKEmI5c53wq22EAROcKytUmIsgTNdjBcfp0nfdIumIvlLhczdnLC5UbVUInuiOSQ+xkWbLSzlCkpRXMp8OAXrHPoJDpZ4fhde+OQz3clT4rvJn6u3d6/kOlOr6yYy6Omf8qdj1hNDoIAR4N1ZLGwO0btIV9gM3vgDejBH378H6/v97eCFmPvMP1giCAMQnn94xkGD9HwuiRwrbGER86cj1P2XDSKsjPEZ7ynmFsxnguNzsvlSWP3KBz4Lda44fr2XWG+dJzVW2SBTKkiB84knnnD3yxfOWyw4gfCJq9+DDz6YksBJHT8pRlOSjIKupSTESFfg9EIeVq1k9Le2qrgk62whbBC3inDHDz0bov9QrMQDnPfWV+JXk1Euspwla7PQryMEQgxsuK56ATL6vrCKDNQ44qCCge89ShUscsR+YnHu37+/SzmOtQ/C1ZnvDN8VCBds4n2JXR4wYEDkPNfgR1IozscjXiT+d0f8MEJjIq0fShIUK1htEaq9NR3+DO7RSakY2INJqiiTDvGbwpUepU4wk6PngbDOMgJQMkuor5Nvn51V4Hz9mwvzrVsx+1NbZcAwBM6YzPXkXh2vk35t/14wPF65fD2PkocNLxsUP1749BOe4OSmInmN1Pp8/ROpujJfH431yxAoOAR23RXDSxfdwlCcstxOU/XEuE8GDz4tLhYsv4NXIHMTcpMwZlUUYj7FdvPNN7s8D358Zv7N3Jj1brlfrJ+FnDE5lee1WsOa5o4Z44qynw1VzqJMCCQAAEAASURBVKZyedRF0PLurFiJsM75jSQNEJNOrD6pEK6qEMJcMiK5kKd4gpq/HuvTt5FMuEVIKNZWibA2FZYqBFCE6S5dGFTiU6qCKO5eUPCePFcsUm+99ZY79H321+wzHARw7yZRE4IYgiOCZfSzY6FuP4jjGosQh0s333viPFlWxxOCK8IncYisAYfwSOxmtKUQ4RIlC259DKK4T8OPNdX8IuKeZ/ATj4HBgwdHTvH9eeyxx0ps3hoEL4RN+GHR4R6pSzIiJtgQ8atYPSHcthE2ETJxcae8X6DcFUjhH78PXnysVReNI9URfCGfNMwdFNi/jet0lY3rxIqMyb8bcRbOReH3q1GtdnJyz7dLCJvvzhwubLMWjQm/wTLgiECJCxeTNsZ+lC/8Rvmt4LWDQgklCVbRdAn3NiNDwBCouAiQV2HKlP/pDd4Q0k2yxM2D6mp6ulNQx2I6QrPHMi4xP0Tw9POUWGUL/RxzNUIgyBOBAQY5gzkGYzbzIMIhyANDAsJ0idwRGxIVnMDJ5NS7I/IyRjjym7dW8gC9NSbRw8TqAT8olitjdF0m6BBJeXw2K5+oJBV3UuJAISYBCAfxiB+wJ+JG07EgkngoFfJacyY40XWCiY78+p6p8LQyqSOAi+uECRM0m9whTkHCgE1in6Ay4oorrnAMGdwvu+wyJ8ShbEE4I+4Ql+cgESMMP+Ka+V6SOTM6zpdU9whfDJYIXyQVIkEWwloit3KEYqyannCJJQ7Sb7SF0IulkaV9IARffqsItWSTpQwu4mgNIay0EAulQwiwWHNx203XxRVXWpQzCNnEk0ST9y7AhThIKHNwCSLzLlv0sknBsvmwj1ttIVBYLrXBe91y48Pk7O0nlYrXfGfmMGGbuXhMsHhB7uNKi6cOExF+k8R3E3+NIolkF7h2kVCOzNMkfktGvLOw/KfyfkrGy66XLQI8X8Z0vJEY/+ONiXxHuO7nMsFekosBxWU8uvbaa90kOvo6ykGUoEFiDEfxwXwrHqE0SXQ9Xj07nzkC9977mFYepht5YcOiE9Xrand59NFHSzBE0OJ9ieKbuSlL/lV0C18QAOb+zFFI9kioE+MwsfjMy5gfsU/uFX6TqRBLaRHis6FQwQmcPlkQL18mkdEbmgYIt9rVq1eXeI7EkHlC4MMagsCFZcVbZ/x1LJ/BLJ0Mvt46E8x66Sf0WGaIjUtEBFTTFm3SdlDopD2Wd4GC8W0kr/DEWpfeNRGffU/wZIOYYHgLT6IJCdY1MIRYMsPfKy8L3CshhIcwks04ZvavFAIs+8HAheUSbSECJK62/pmP1fWrIL7zWDT9RpwjFBQQiev0saDuYoJ/CJwIpQhfxCJ4N9xY1u4EbCKXSIjCIMz3mthq//3131WWZvF955PfCoSig0Hbl4uXWS7SUJIdYi1wsWdCHr2otF8ShvaChBX2gQcecOMFY4a3ugbL5NP+hipw7tXxejmm+7NSS5MEbUjktecsV4UGHUURGdN9srZDDz3UKXaCiqogPrwzmBShpU+0vFCwju3nBwJ4eeC5gSKdeHm8TEhuGCTmHCggERyjlcMoGciWjCIvFpGHAkEVJV00MRfxHmP+Gu8hhIzoeZW/zieeUcxzeCcY5R4BLG4zZ+IpFL7ny5o1nVWJ/LenIIYQlMHMhVEseIV47u8yP1tg7oRrLb8T5mLMy1AWMm/DHZffLd5eeKwwN4omPNT4PRNeRM6N6DlLdPmKcFxQAieaPm+5xLISi5jQQjzgaHcisrxiCcHSwvIjfqAlgUh0PCbWGtwF4OezCcIXl0S/nAjHfpKM0IB5Ha10PCK2jMktRKIi+kBf6BMJKhD8mJzTHu1AJO8hrocyWK88oRnxa3byxSdODuLFg+CB4JLILZE6XmOK0IP1lXZo28e6odFMx7rq+2afqSPAc8D9lARRCG18fxjQUQD47xKxl3y3/MZ3BoVFMH7ZW6yTtYxlFLdsLChY9XHHYXCEEikoEvHFosn3H80eE2RPXsGD4sL3nU80gfSfSTPJiKA999wzEjfs66f7SRwsEzSIiXhwkPf4RAuUxGWjoPHr2qbbZlmXb9eon9Su3rSsm027vbBiOHGhPaXnO+pCW5zwKe2OVKAKQe05YwPxRHhK8D4ixAMPCSxafuJCWAmx2yhV+GSc8QrZCgRLhbwVPC0Yo7EkQf5djbI7aEHkPN8FsnQzzgaJiS6EoBis48ugxID80lz+PIpw5j+xLKa+TLxPxnWEEfpulHsEpk6dur6RLXLQWGdVCk9279ENyYU2UyB96AOCJwIoBiq8KAkfQuGNFxuhTD4Rp/eYZK7HmM18rqL/bgoqaVBwYDzooINifi+I58R6RxwnglPQcsmAHHRDRKhDO8GLOJqwGOJu513sOEaTiAuiT6hDnXPOOce9zJl0M8H1lkKu+bUUgxlCEeoQNNFU47rKwO6JtNJYTJk440pDWSbITIZpHz9yBnRcIKmHphvLFoSgiZWUwd5/oYOZcBFsoGBfsHDyRYcnAo+vB34MMDvuuKOrwz9f339GLuiOdysO8g5ej7cfi1e8shX9PNihDWPAweJIMD7fVwREvqNkbo1FQUt3rOvR57yyBEukjxNGY833KZPnwXcYl1i+M2i/g4QFlbgHrvfu3Tt4KbLvk1Jlal2NMFq/gzCNS62fqPnraP/5DdFfNJBMzguVujTZSybPfSyvu49L7aqlqsRQQ0flKpl1FRfag7ver1bNBpkxqMC1vPacMQNi8sLGuI1VCwUi4z/vkuA7iUzNWCpQfEYrWePBxXsN4ZWNMQklaHRm83h17Xz6CGAdHDJkiFM6ew8j8ObZoVDAE4P3A0oEkrnxTIJeV75FFBIQAinjcPQ8xyvfGPsZh71rJO60EM8dbxuU4akSykSUjvSHiXZwrpQqDyuXOgK8zypVqq1K08S5PVLnGCy5lTsg+SBeErzfN3SrZhCdRPtt2hSHPuBthRcCYzNzczL54kKLRRMlEPM6DEfeeITnGeMsxp5UiTmgX8KL3yqCa3ToUKq8cl2ucq4bCJM/Dw9rBFuipD1o7ijjNXy+D/hVM4jiUohrHbEN0YOwL8t6k7ikMOAiyOF+guk7Ol09Xxi0jGiREdiwRnlC00E/sOQECUGPLxUmdWLcGOyxLvGC8JYYJs4sXYHA4dvnZYP1khcN9+BjQuGNpYZBAdcqXBW5z6AfOS4y9MXHz/n+4GpFLCHCNTypB35BYZOyCLvU9+6Qvj6f3AvXcC9IhyhPvWXLlqVTrcKU9cvk+Bvy7lJ+IugnG9GDD99FcMuEvFab77cn78LrXcJ9fDIvs0TE95dJBYRyJ9oa7oVMrCpBFysGV98WkxziIpj8BJVBmVpb6QsxJtG/OSY+uNxCrIvoJ1vuhP7zWZv9cT5/kq023wmBE8o0U+1enW5Y70JrwmYxkon/B7XnuKijHGUSEhQ2PQeUqIz7QWWnv+Y/yWY9aNCp0m2rDk47z1iE4gi+vJsaNK4j/ffY0Xk1ROcA8DzsMzMEeB8zl2BsDSoBiVFHaYYnCVZIvwQFITC1atUq0RgJphjjvCdYLLfav61j4uYOnoFXsnPMHCRdImaYeUS0h1m6fKx8cgTGj5+gc4GuyQtmVKJY4OS9zNzShM2MQHRKFzyumDsz10f5g5caczEvaAY5p+Jii8HgaDVIdVIvTDwnkWMYn3HpRUZoocrG/dVrzIcBBvmX535BCZxhAIXAyAPxk+pEPLE64VaLBjE48Meqw9qECIvpWPmYBOPWyIQ7Fn/OYfGMbh+LIvcQy+rFBB7tRrRgHKvPwXMIC/BMt16Qh+2njgAWPVzkWFqE2BwEfB/Di0AEccwE4/zzz3caMZQFxNzwnSSJSCaE2y6Ehf0RtaYef/zxkSQ7PkGAFxS5jvAWj+g3ChP6iLIErbvfcMeCN8oVrPUoYkgYRKZlJrtMmjyhSIFwG+desegmWqLF14v3ye8GCw79ChIeBGTtZTKHyznu8liDEL7BA4r1OwzyyIf9Lk0rrsDZeKP2ckqvd6Vfm4sc1JNUSTBCnyebUWoIEGeFptwrdaJroRxFkcmkhzVAo+nue+5wXjTvTn5YaraeKbvpT+PoW0WGvCxy2mMih10j0uOwlbKw6ofy9Mv3yg59tklLIx/dnh2XRMB7cvn8EP4qikjCfxDmmMAyPhMOEVQe+rI+dAchAS8oJqgoKoPkcwFwzreJ1YVEMJ6SKR19ueAn8xUoWuEfLGP74SCw2WZgPT8cZqW4/OrOMA9J15jgx2zGb6OSCOBdRcLGWG7ulAy62Po5WZDDEHWzZ271tYbCtdFx/Fi9yNvyP7pdrdsZum2vCqnfND70OrWo9lDvs0wynCub0GmDEzhDR9AYGgIZIIBVF1eV6dOnOws5brRMDHD3JCMsxIQDV2eEJF78CGm4TuPSFrSkUzaZosNfJxMrWm8mK959g0QPuJcTVwyhfMCNm3N+4uIurP8HL6wnaOIgJkDsBze06yhGcCVBgMatC3cStPScx6XEE0IwWd4QEHFZDy6/Em99Ul833ieKF+Kcg4QVAKsOmRZR8qDJR9gllpt75cUabDtYN5/2a1SpK52bDMinLpXqCzGc0MpFxZ+p/N+y+eEuC22HRrumUtzKJEAA961YCsnoKniuELqBMILXT59+W8vgM8+R3QdrHPS165yw2WN/TSDXTaS6GtHqbSzStpfINuogMWCIyEHXrJTGXZa4ONIBe+0Rzd6OM0AAjyEoeoznHGM3sWLeOu0zgnPNE55ZuN1SDoW290IhgZQnLCu8UwjPYfzlHUC8GdZVxnPvCRUUSn3dZJ/E6UPBbPvJ6lSE64vVYjVR3Z4XJPEMCvNeixM/zlaWaQy0KXdgiiuJYsooXAQYn6M9woIt8PtEIOW36cOgmFM10jnSgzrXUh2gnK5ebmR12VG3drrV0K2JblvqxuwAs8Vgdc9foKF3fE/ywdpZVftkZAgYAmWMADE5JLhhUMElDQEoGHPru0NsJNZC3FeZECCE+thgylAnkXstQldQ8CL2C/cqgtchn70V916sHp5wi0Jz7t1tOY+1M2jxTNSu54OrN5Mf4ipxBccVHm+AaCLmEusogineB6la2oNuYdE80cp64d1fQ9gdplpXNtrCNY2Y6kSDv6+bT5+41c5YkL9rLGKQ3KhB6i61e3e6Ufq2+dvqnU9YF2JfRo4c6az1jBVBd/bgvfA75LeGUgsPC7Lg1mkscvKDOq4UG6mCxWPu84wPuEzDKlQgfeved1ybicaFhn26Oz4Nt9QKRjERILQF8oJbsBDPE4UZ6xvjPhdcqsqX88tNYQWB8ORA0cc4jJIR8kvLMT4yTqKARDnoQy6woOJFQxJDnmc6nh++33iS8N0Lvq9c4xX43xQNH2FrqsJ+B1XktNet9nqLby5uG0+dYvpSP3YOuYmvNM67pVPGhsx4g2eH8oewoUTjMwpyxmc8xHxuFkwRJ+mWquDWUcvq8CyP6MacZ7KGW4xU9+h0qIYugddifc4L9rOhVPudTRvlXhetHQ8XV8RkhAYRixICgJEhkGsEEICIiUpGDDxsYZEXND0/Bj4EryBxzscUB89nso+1pY0G0iciJjU+cUWiculci2Ul8PX5jRfq75zlUUbK2f5W8vIzlUy1jTfqIId0HSHtG/XPy3soxE7hMktMDwojfudseA9E7zP2eGq6cUMnbJ7xlD+T3mfPA0XYbtxT19W98Fy5+abbYjI4/IMpMc/byb8RIJ8CFD1G+xJ+2alYCkrK+Jh/whJwq2bcxc0VLxqUm4TO+EzGCKy45fL9wK0WgRPLKO8CXLNRRpBxM3rJFd+XWJ/BUAbcA6PjS2PVqWjn5uvEnm2CxtW1UoEewRMBtHL16qHeKl5Rdes2Vat0LgTO6SU8kULt+AbMjN8YiYP8eBxvfPZeafxm8VToo5jFXpsjOZgDtcg2ut2pllVyhfjVNZLXFGmsHnX7jxmTStGkZf5+4yQtWrgFit0OUus/E9REk9TUuFgpQ8AQMARyhwCxjpvU6yE/L/s0d41kyZnEQYmSBnVvfoRmoR0hNavWz7Ilqx5EAGs98dWp0rEnHCEL5i2RQY+mWiN+ucOvE7nlkttlj933SmtSE5/jhncFLxDcWr3gmQ4CCIg+GR2T1GgiuRux9N4zhHh6whZYVg0vFMgvRO+tZyQOSkfgROEBIXhuiMKmu/nAvx90ks/2vnrxIHiu1AzSYdKOO/bWBE1PqSWa6L2waIa65L+reRXUt94oVATI3cGWKh2ky6mo40nGwqZvhxb30o0l6EguGrZy37eT6NNiOBOhY9cMAUPAEMhTBJrVDnfiEvZtJhI49+50kxy95X9DETZv2KNI2HZvPyzsW6jw/LBqPfnYs7KTrq5Svzj0Lqt7btNDZNvDNLnQmSe6jIxZMdtAK/ukO/GSPiWChTU3IZYlIVGQ37wwSUw+3l7E4EM+0z3uuZ4OPFBN1UoIo5CPKXUHKfwj/ANKtJJACmwqXJF1am2eoZlKf9K46Wv07g7SrZomD8yWhg69RHM4fKJswvR4uVEt3Ts59+1s+2f1M0fg0ksvlS/VxT5Ty2Z0ywfoiQ66na25PMqDNggLZ3kAa20aAoaAIRAmAqvWLZEZ89+Q6QtGafzmG/L72kVhsg+dFwLn3Gkl2TbZqKOzarZvtEvJC3ZULgjcdOt10lW9mXc4OrzmeyuvF6YudQnAfAbq8LhXfE7E05IUaPbs2S7ZR6p3TDw6cZoQVkzveuvrc40lc4jLZNkbBFti7CHCiLBIEt6BCy5E/gCIOkEiyVTQHZtru+++eyTTt4/7Zz1Oo9gIYLHak0u6bu5LmmQIy2d7XXKotuZoSJe22247QTC56qphWvVE3bZKl0VU+ZF6/JDmevgu6rwdliUCrK9JIkPNzybJg65S7xlC5y0ax3m4Jok85BDSDpUdmcBZdlhbS4aAIWAIpIXA/JUzVLgcpULmG/LtwnfSqlvehaNjOLs3P9LFa9aoWq+8u2btKwJkpZ3y6Reyd8i5mqpvJNKg7Qr5/KvibKsGdnoIsGTClVde6dYJT6emzzJOwrdoYRM+rKGK8IjgictucA1yBMiTTjqpRBw/sWW40uKmGyQSmERT0JrJ+uYQiY2MkiMwT58Jm4/3dMmG9FlV1rwHqdKVVw7VLPOj1J0aMfa3VKvFKXeAJjR82i2vF6eAnS4DBGbOnOlayVZ9EN1VHO1b60oDkydNMoEzGhw7NgQMAUNgQ0Lg+8XvOwsmlsxflk8t2FsPutTuoy60O7e5oGDvpSJ2nDT7a9esk5Y58MxuvJnItNEkMjFKFwGS9pAMBsGOjLTRxFImsTIBs+QVWzwirtfH9sZa348sltFL6fhJLzx93Gc8/v78qFGjXDI2W07DI5L6p4/3HLc+3hPL52Yac5cKffXVR4p7K5k/HyvpaN06pVItUOY/uj9Es8jXVcPrvdK5c0dLGhRAp6x3SRZUTwXDZuoCHza1Up7j332Xxd7DZp2Qn8VwJoTHLhoChoAhkFsE1v75u3zx2/Py/FcnydVjW8h9n+wso7+/vqCFTRBD4Fzzu8ixXUeZsJnbr1BG3EkG06xNFdEkl6FT07Yic2b+5hYxD515BWdIdnDW18QaiVKgrKh+/fqy0UZqns6CfvzxR7e+8R133JE1ryy6UfBVfbznG5pp+Km2bWXiv/4lC9bH3Sa6uXnzflD32ku0SGfd7k5UNHBtnu4TwztEeG5Tp37i3KvJUnz11VcHytluWSLwtY7PzXMgbHIP7XSbqutzljWZwFnWiFt7hkA5IYCrExuLeV922WXCWn2x6KGHHnLXfSyOL0OyCTTorB0ai+bPnx/3BXW/LlY8LTDAkRCDPvgkF7H4ce6ll16Kd6mgzy9eNVsm/HiPPPLZvnLF/+rKE1MPk09+fkiWr/61oO8r2PmeHfd2hxutLp0tM1gu0X5nXfh6v9Gj3ZaonF1LH4Hx4z+U5l3+TL9iCjWatS8u5N0rfZXrj68kbCOGhhmV5LlXnM9ddtnFWTcvv/zygrqpq666SlhTOZiEqKBuIA87u3z2bJlyww3yQo8e8pLGa35+662yUte0jkfXXDPYLYHTvv1TKjjuocXO0u0u3T7QbYVuc3RjDedbdTtFy+yuCYJWCOu/kqGYNcJf0xg/ltfheRJTPHHiRC2bOvkxm/HbKDMEvlCBs0VmVZPWwva9RJN7+fV4E1Xg+zd5+HC3sZ8NmcCZDXpW1xAoEATISoirFuui8ULhZXLsscfKvHloN/8mNOrE8vDC2SQqgcFonfgT/0NCi1g0SWMCrrjiCreAfPA6wuWgQYMEVytPpM6/9tprI0ku/Pnoz3+pZvdGXUi7ItCPSyfKOzOHyZ0Tt5Xr328rL087U6bNf12KJHyXmfLGa59ON8uJOz/mukEK9kypriYwadmvn9sy5WH1YiPwZ9GfUqlS7GvZnvV8o10/Gz4uwiavZRtnlm0P878+Lq6nn356/nc00EMEzXvuuUe/Vzn6YgXa2hB356myeML558sTmvDpzf32k2+feEL+Wru2FBS9e/eWKVPeUovl0Rq7qxatdk9JlSq7arm6urXRNSAHybbbTpALLmilyYGGqxL6Dbc+a5DR4MGD3bqsjRs3dsmrhqvQkSr5MZvx2ygzBIpUwZ+rX5HnGz0+x+opQuYkHYvYTOCMhZCdMwQMgQgCf/75p7NMEhfEQt+4TuE6Q+KI6JcI6fShu+++W19QVSI82MFKCcWK/+G8j/F55ZVXOIzQ2LFj3X50tsNIgQQ7Q4YMkYsvvjiytlyConl36a+idfL1/FflpWmny3XjWstdE7eXd2cOl5+Wlsz6mHcdz6JDTWt3kkHbjFEX2vOFiQpr/P32W26Fi1mLxwobVmOj1BHYftsd5NdvcqNznv99cT9irQWZeg+tJAmEColIRGTCZtk8sTmqFH7vuOPkYc00PEaVxD+9/XaJhuvUqSMDBw6Up5++U2bOHK+eSasFpfDcuXNl6dI5arV8Tm666Qrxy+CUqLz+oEOHDsL7HCUCGad32GEHl+E4Vlk7Fy4Cm+tanZmrahP3hfzDdXS9ZowPZUm5eduU5R1YW4aAIZAQgaefflo+1dTr55xzTqTccfqiwuLJi+SLL75w53GxxXp59NFHOzeaSGHdwUr17LPPulMsHk4K/mjya7txPUhvvPGGOyQdf7pEX6CLdO2yQqBlq+fKxz//nzw+5RDnKvvoZ/vLRz/eJ0v++KEQup9VH7dqcbSctf1kadewb4QPi0tnY+GMMEqwM+KTfsI2ae4jCUrZpWgEunfvLr/N/EtWLY2+kv3x/NkiLVs1llq1amXPzDgYAoZAXATWqWvkDA2DeV2TCz2lWYUnXnKJLFR3zFjUs2fPjNZHxdL+1VdfubokrWLZHaPcItBVXah/06RBuSD0gVuoMqGsKTd3U9Z3Ye0ZAoZAXAQe1sWmoWCaeqyXWDmh89VFBxdXb928/vrr3fngvyfUdQdCUIVixVaiPYW+/vrrSEp/XDa8oPrTTz/JwoULXZlU/2GNJc0/GRt/CWGR7FTbTafc3OWfyXuzrpV7P95Rrhm7ibzw1Sny5bwXZd1ff6TDpqDL7tv5FjnqH09KjSp1StxH8+bNcy5wlmjQDlJGAG8H6JfiVSxSrpdKwcUaYmbWzVSQsjKGQHgILP/+e5mi7+/nt966ON7zttvkd7VohkFtNXnRiy++qBlsR7ikVttuu628//77YbA2HjEQYEmixepWuyTGtWxP/agMttHY3LImEzjLGnFrzxAoQwSwLmG1ZFHvaGvD9ttv71xu3nnnHdlnn31k1qxZLknAZpvpmgYBIlnQvffe61Ld33TTTe4KiYWCtHTpUlffn/NutWTCDMaJcpwu+f54nunWz0X5bxa+JSOnnyM3fdBJ/jOhh7z13WUye8mHuWgqKc8aVepK8zpbJC2XiwJNa3d2LrQ7tT4vJnsEzly71MZs2E4mRQCX58236CC/fJO0aNoFVv5aW7bsuk3a9ayCIWAIhIOAi/fUpXAe11wMLt7zySflrzgJ/9Jp8dRTT3V5GtpofObOO+/skv+lU9/KpoYAAie0PjohtUoplFqkZX7WdXd7qXt0WZMJnGWNuLVnCJQhAlgboXjWhmuuucZdRyjdVBMR+HXa3Mn1/0gWhDB64oknCi6Su+++u8uCR1Y7T7jbQGRVhM8zzzzjjr077V13kSVPnGuv20njX8uWLV1pHyOaRtXQiq5cu0A+nfu4PPX5UTL0vfry4OQB8uEPd8iC3//GILTGUmDUeKP20qfVWXJij1Fy5a7L5PAtHkmhVrhFtm5xjJwd5UIb3YJZOKMRya/jkwaeJh8+rmtmjgmvX5+9KvLNJys1WYlmKzEyBAyBckfAxXtqksCHGzSQscR7qpI5G2rVqpXzXCLLPGE5uOqOGTMmG5ZWNwoB5j3HHHOMPK/C4e9R17I5JMCpq7rTHnzwwdmwyaiuCZwZwWaVDIHCQAA3Vig646zvPYMaSXkglimJtQ6bTxbkU90feuihrnwwVtPHgZKYCLdbYkYRUnHBQQA9/vjjXZ1MEgd5gXPOnDmOR1n9+23F1zJu9s1y/6T+cuXopvLMl8fL1F//K3+sW1ZWXSjRTusGvWXPDlc5Ie+iHb+T/bvcIZ2bDHBlNqnXUxrVKtaIlqiUo4N9O98qR/7jCalepXbCFsoihjNhB+xiQgRwp++9cw95776ExVK+uEiHm3dUt4RHBJkyjQwBQyB/ECDeczrxnnvs4eI9Pybec+rUjDtIRvsvv/xSOnXqJCzlQ1Z5o/AQIOzpzxo15IGQWE5WPh/odoeuNkBSqbImEzjLGnFrzxAoQwRW6gsGaqCazXhEnCQUq4xPFoR7BxnNWOKkf//+rjzaTdxtIe8q200zqxFzCbH0CoInwiYuvbyUfMZaVyDFf/Xq1XMlly3LvaA3c9FoeeObi+TW8Vvo1k1e/+ZC4Vx5UJXK1aVbswPlkK73y6V9f5Iztv1Q+re7XDap1yNmd7o0LV73MubFkE42q91FTttmrOzU+tyUOGbrUksq9hG6xAKbUW4QePWld2Sl5gB77rLs+f9Xc3vttMu2bi3G7LkZB0PAEMgVAsR7fka851ZbyUsaXvMF8Z4Z5ElAmU1iwkceecStq72V8jt3/ZjN+G2UOQKNGjWSuzRmdpqyKM71nzkvrKQIrreoV1t5KQNN4Mz8+VlNQyDvEWjatKnrI0mBMqHHH1d/OyWslbVr13abd8/Fejpu3Dh3ffJkdGfFrrtba8ICBNTbb7/dnfOuG7169XLxnOnG9K1atcrx8ZZOdxDSv9VqrZz66zPy7JcnyJVjmjlr5tjZN8lvK4pdhENqJmU29WtuJtttOkhO2OpluWrXFXL8Vi/JtpueIvVrbJKUR5cmuRU4caE9a/tJ0rbhzkn74guYS61HIn8/mdSQFOx7zfk16pbM+rlU8/ffowmlVywUeWPk/zJjYrUMAUOgXBCYN3GijCfeUz2e3tx/f/lW4z2LdDm1dOiEE05w1s4ttthCePO/kE5lKxsXAdxq2Z7WEpmmaPpU656n2yYaEnXepZfGbSvXF6rmugHjbwgYAuWHAC6NUCbWQayXPpMtLnJBFwyWQLn11lsFgZQ06bjK9tA03tWqVXPt8fIZOnSoc6flPITmEzdcYjERPlOlFStWuKIIL2HQwt+/k+kL3pAZC0bp9mYYLLPisam6w3ZWYRELZav622fMq3OTvdTFtY6s+bMYr4wZxai4X+fbZMfWQ2JcSXyKZ7Z69WpZtGiRINgY5ScCTGiaNWsme6ir3U9fiux9gU5OuqXW1wm6CtL7j4o03biRfPbtxBLjRDSHmtu2lnW/LpOa1Zs5i0r09ejjzqrUqqInO+gnFhgjQyBfEfhdE/RVBJrz6qvC9v5pp0n7I46Q9hqLvamur5oKMd6jvKquAuvzWuFInTfco671jCtGmSMApoQrEf7EGpoH6RbfZ61kOzo0ywTdempW8kmff17yYgpHNdQ7rkXfvq4k+9mQCZzZoGd1DYE8R4C19qBggp9Uu0wiIayYZLM9TV8+QSKrLQIn2Wr9NSybnrBqInDiTusXAvfLMOBm6wVOkg3FWnh6yJAh0q9fP8fOx26y6HSmNEczyE5XARNBc+6yzzJlE1o9rJGdVcDks1GttuHxVZ6f/1q8XmoYTJvV3lwO7jpCrZo7ZcTOKzxwzTaBMyMIy6wSycCWL18ug84+Rp48b6T0OU6k0aYqSLYRaaKbpzXqm7XgB5GFus3Rn9LX74kcP/BYefThYm8IXy7W5wkTZ8vYk0+W6RpDNP4jdO6JiRHFjSrffecsMIlL21VDwBAIC4G1qujld8pWTz2WvPDZWAWfZLSdFkBfNbpFC9lT1wflfc58wc8FktW366URYC3yAQMGyImqBLhFPc52WrNGmmkxUiry6Wml7rAQDttEXcfzR3VvHnbFFW4+5suk89lYDQX7jxmTTpW4ZU3gjAuNXTAECh8B4h/32msvGTVqlIu/jJUUKN5d+vU7fcKfYLmGDRu6pVRYH9MnD8KC6Qm3mq5du5bIhBYUOH05JrixljsJZrj0GXCZEKdK6/5avd6K+YZMnz9Klq3+OdWqOSlXp/rGKlzuFREykyXbybQTCLBhCZw9WhzrhM1qVTbKtDvirdK4UfN9MMpvBPBiePKhV2Snbe+T2++8STPYznIdrlajkjRuXSR/LBNZst6QU7NWdenefUvNVnlRJG47v+/OemcIGAKZILBMBZzPrrvObRurAtoJnyr4bKQCZTwiJc2wAw+UY9QqR3KyN9980wmdzEeMMkMAK+ekadPkwgsvlGfU6vnjeqt6DWWH0Km6QNGoBkeNNG9GH1225ikVNrfbDhVA+VMlXZi9KJNukJGKNMhYIVg2wcgQKEQE/PeYvmf4U8j72yZT7CGHHCIsUVKWg/0vmoAAgSMbreYa1eI1adLE9dsvtRIP8CV//OCsmDPmq5Cplsy/itbFK1om55vX+Ydzk0UITCfuMZvOrVgzT64aU+xGnQ2f/Trfri6052TDIlIXpQeLhR911FGRc6nukHRi8vDhrvigzF5VqTZl5WIgMFcXjccjgRhtFEskDGPBdxRKKJBq1qwZo1biU97CmbiUXTUEDIF8R6C1xnt2wO1Wt0pVcH4vJp/krad6OfXSMXzx4sVO6ESJfdZZZznBs6ou92GUHQLfk/hJw5vYSNq0vz4PPNLwbEM4zTX5uV2qcqA98Vw/EeNvCJQzAgcddJDstNNOwmBflgJniwTaz1QheVVjSbCCXq/Z9GLRT8smqQWTeMw35IelE2MVKdNzHRrv5iyZCJlNNaNrWVMdjY1r17CvzFo8NqOmm9XuqllxR0ibhjtmVD9WJVsaJRYqhXGORF1sZLC+6qqrZMaMGYXRceulIWAI5ByBOSNHCts4H++pgmeseE88ogi/wSXUWztvueUW2W+//XLex4rcQNu2bYWNpJC1atWSu+++O69v1wTOvH481jlDIHsE0EKh/eqrgd+TJk2KxE9mzzn3HIhbIGCeQRUq0r9iCyYJf96QRau+z30nErRQq2qDiJssQmatag0TlC6bSyQfykTg7NHyuGIX2sq1Qu0oVu50MxNn24FJcx+RyXMfTYnNoF7xPXTmLp8ir844t0z5kKCpZd2/3dOjGx8xaZfoUzGPk/F5dfoQmbtiasy6wZM9W54ggwffLn9q1kosntHZolPl07JOd9mvy+1B1rZvCBgCFQCBtaoUnq7LpLHVa98+7h2xlrcXOrHGnXHGGYLgmYmnRNxGNrALCxYs0PF5sBM68/3WTeDM9ydk/TMEQkCAZUrGjx/vrIUhsCszFmTH3aFfd5n088PrYzJHaRZWwuLLj5rW7uyS/ZBZtqNaNPON6Ncb31ycVrf2V0GgT6twXGijG0bgJGlQWdLiVbNl1qIxWTf5x9olZc6HNhNRqveVjA/CdCqKifcf+Ut8HDVJxKIFzlT5aMxCotuya4aAIVDgCGym8Zm4144ZODDunRBi8cADD0QET2I7ETpjJQ+My8QuRBDAYszScVg5cV3GmpyvZAJnvj4Z65chEDICm222Wcgcc8ful+WfOwvmrIaj5H9jxuWuoRQ5k6UVCybCXIu6uY+NSLFbMYs1r7OFyyDauFXMyyVOblynm7NqtmnQp8T5MA8QODPJkpxNHxrWauNci7PhQd2a1RqUOR/aTES4TKdCyfgksqJ6/t9NWSpP3l38+8NTAoGTGM4gpcKH8qmWC/K2fUPAEMhvBJr07BmJ46zTqvilk0jg9HdDXglv7STsZ9CgQU7wZL1vo9QQeFKXn3nssccihRmfTeCMwGE7hoAhYAjERuDbhe86IZOEP/NXlm+sGJlZETCdkKkuqnU1y2wh0axPRJIJnD1bHu+EzaqVa+b01ojh/OCDDzJqo6UmpcuEerUcKGzZEkLSoG3GZMvGCVth8KEjYfFJxb21+8ndpbKm1mdN3urVqzuBMxqMVPj4OiSAWqAJLowMAUOgcBGoo8prn6m2aYw1tUkWBCUbvxEu77vvvojg2a1bNyd0IowaJUYAV1oSMAUJgdOvBhA8n83+8tmz5ZtHi8NTOun66nXbtMmYnVk4U4QOF63Ff8xJWrpm1foJNbmp8qGhRJrsVeuWyC/Lk8ffhMmnRd3uQsxaPErFPYu6yfjgovXHuqXxmomcb1iztWDJMCpMBFatXfz30iW6RibH5Umsh9lZly5xgqYKmSKVyrM7WbU962ORbRK8s/fv8h91oT07qzZSrZyNSy0TlmSTllT7YeXSQ+AKTaf/9ddfO2HT12RCkw2RddjIEDAECg+BKjVqRITMVnvzfoxPZKZNh3Cn9dbOQw89VE7WtXpxs8X91ig2At6V1l+NpxD01zP9ROD043YLzQNiAmemSKZRjyQU784cnrQGQmIiDXSqfGjohj3ix7z8smyKpJo8Iiw+JNdo16hfXAxGfBL/WrBSMj4koUhFeN2t/VDZvf2wIGvbz3ME5q+cXrx0iVoxsWiWN7Wqv51zkyXRzqb1epV3d0Jrf84UkVXLRWrVLclyY3W3JQtt6wa9S17I4ZEXOFl2yKdRz2FzxjoEBN5//325+uqrS3BavXp1TAtniUIpHlSuVk2q16+ftPTSpUtl7dq1Uk3L10+hfFKGVsAQyBECRZpUa7XG0FU02kwzy2LN7HDkkVIlg2WQUsWDxEFkWfWCp7d2kmjIqCQC0a60XMUTJVuFYMlWwj+qGj5L42gIGAKGwN8IfL94nLNksnzJryu++PtCOexVrlQ14ibbRa2ZDWqmEOhYDv0Mo0msnN12/ZsT2UYPVmGzamWWiS47wqUWInFQGEvllF3PN9yWzjzzzIgrbRCFWboAfBi0ce/esr+u452M/DrJ/fr0sfW+k4Fl18sVgcVffSXP6vq0FYGa9OhRHJepQqaPyyyr+2KpFC90HqGCrk8qlM+xiWWFDe3EcqXlPIo5EzhBogJQ+4b9ROJne47cYTIXz1T5RBjG2aEdLHzZUjp8kt1bqv1JxqfnJgMTWlL9PTss/YF95g0CZJFlyZLp6iaLkLlizW/l2rd6NVoKFkyf9KesBa7yuvmgwHlAlzukd6uS8R5l1S8snJAJnGWFeHbtXH755TJt2rQSrrSe448//uh37dMQMAQqEAK1N900ImTGisssy1vFo+GOO+6ICJ7e2nnUUUeVZTfysq3zzjvPZaWN7hzLVoWlEIzmHdaxWThTRBJX0kTupCmycTzC4IPQFoY7aVh8uP8w+gOfbJJ94NaLO24y12baMQoPAdbDJNmPXyOTFTPLk1rW3ToiZJal+2h53nN02yQOqvx7Mzmt30tl6kIb3Q8vcJb1WpzR/bDj5AjgSnvNNdfELThv3ry41+yCIWAIFBYClTURWAe1IrZXS2ayuMzyuLO9NVYUa+cFF1wgRx99dMTa2aRJk/LoTrm3iSvt448/HrcfP/zwQ9xr+XDBBM58eArWB0OgABH4YelHzoI5Qy2ZPy2bVO530KnJnsUJfzS7bOONOpR7f8q7A6t1udKanw+S1geWXbxmrHtGW80EoazX4ozVFzuXGIGqVavKjTfeKGPU3XXcuHGyYsUKFz+5bt06XUazSPjkOXolQmJudtUQMATyEQEfl0lsZtVatfKxi5E+EZt46623RqydXbt2dcfHHntspMyGsIPLLGsgD9dM3++9955MmDBB1vw/e+cBH1XxxPEfNXRCFQJC6CC9hCZIAAFBsGFBFP5gAaUooqgUBbGBIiBIB6WpCIJSpFeVHkLvBAEhdELvkP/+Ft55KXe53L2rmfHzuLv3dmd3vy/u3bydnbl5U0cP5yvF179jxeB04C/VCBYUnLGwS6tvDjQlRYSAzxK4G3tLGZjKTZYrmeo4f9277nWZ0+e+F/DnfvqSoLTxIuT4LEkPdiw2jQcbs90U93H6+peh7d6nnCu1atUCjx49euhB07CsrfZbMtjTihUrdGJx7hMSgzPl/E3ISAODAPdl6uA/ysjMUriw3w2qcePG2L59u56b2rRpgwULFmjD04gR4HcDSmaH+eCWe9p5fPzxx7h06ZKO4ks3Y363MvXYlStXcP78eQQH284mkcxmTS3uMYOT6UA2RU9yqPP29gImRw+DZNjbL2gYkkl1aklUP12EbpqJuXsy8uz5a0mnTGF9e+60jupJyvA9eG6lQ1FeXdWTFN+kuMp13ydw4cax+26yNDIX4PbdG17t9ANZyqpVzKba0CyWs75X+yKNO06ABoozLrV7J0605ABroQweEc8R2Ldvn75nvXr1QrX7ufZ27NiBAwcOWD57rjfSkhAQAsklwH2ZhpGZJywsudWdLj9XGUUU5m0s1a6dfm/WP19//XWc1U6mT2lnchtm9dWdeozc1kOGDIERUGnjxo06cJAYnMrgNAy3pG5CUgano3po4NkzOB3Vk1R/Nx2b6JCBx3HZMzgd1WPL8DX6GRWz0uEULokZ0I7q4b6958tNNIrLa4AQOHYxUhuXXMk8fH6N10dFw9II+PNAloe83h/pQPIJ0OB0ZoWTOcCiHYhmmvweSY2kCHA/J3+4GMYmy5dTUTh5iAgBIeCbBCz7MtVKZqHHH/dKJ405m3kb3SENGzbEli1b8OGHH6J9+/aWvZ0FChRwR3M+qZPzc9WqVS3GJjsZ5sGHCs5A8dgKpzOdkzq+TYCrzSKBQWDvmYWWyLJnrx7w6qCC0maz7MUsnacpMqVLmQECvHoTTG6cbk+RkZEmaxV17iTAJ+h16tRxZxOiWwgIAZMIPNikiV7N1PsyM2UySatvqxkwYIBltdOIZPvqq6/6dqdN6h3n57p165qkzTNqPGJw0lWU7rRc3etYzTW3KOoY2DjWFDqO6vlgcSq77XUMW2n3uqMXzdLDaLFmRIy1pceIBOvouKSc7xG4cvP0/b2Y9/Zk3rh9yaudzK2C/JTiXkyVvqRkriZe7Ys0bj4BZ11qze+JaHSUAJ+gd+jQwdHiDpXrqAIPiQgBIWAOgdyVK1uMzKyhoeYo9TMt4eHh2LRpE3r37o3XXnvNstpZqFDg5ti+ceMGOD+//fbbbr9bIYqvWfO2RwxOroRxX6GIEBAC3iNw8vJObWTSVdYX/n8MDX7YYmSGZK3kPTDSstsJOOtS6/aOSQOJEjikXJmjoqJkhTNROnJSCHiPQGblNspVTKYyyevjLpSepMR0Tkyh8u6778JY7TT7gZknx2OvLWP/pr95oHjE4LQHTq75H4EWpYfi+q3zyJAu2P86n8J6HHVu+T0jU0WXPXVll1dHnzZ1hnsBf9QqJvdkZgsK8Wp/pHHPEaDBGRMTg+vXryNDhgyea1hacooAn55nUm55jFArIgSEgHcJpFYRSnXwH+bL9NK+TO8ScKx1uphu2LBBR3Ht2LGjZbWzSJEijinwk1Kcn8uXLw9/i9ArBqef/IH5UjdlNcqX7kbcvly/fcES8GfP6fm4euts3AIe/hScoZB2kzWC/qROlcbDPZDmfIGA8cXIwEGhKdT1yxfug6N9kP2bjpKSckLAfQQKqlQgxZWRmZL2ZZpBs3///glWO998800zVPuEDn/cv0lwYnB68c8nOjoaDD1funRpyWtm0n1IiauvZ67uv7cfUxmY+84uNomk82oKZgu7b2Q2xYPZazivSGoGBIFz585h//79eiw//fQTXqA7WLFiATG2QB0Ef9Awv5uIEBACniWQu1Il7S5LIzOl7ss0gzi9M9auXYtPPvkEnTp1sqx2Fi9e3Az1XtMRq/bBc4XTH4MjicHpwT+bKVOmYMXKFdi6Yz0O7D+EizFXLa0H58yMkmWKokKZ6qhZs5Zf/jFZBuPFNyll9fXQ+b+xR7nJ7lX7MaMvbfEicSCV+q/UfTdZrmTaS0Xk1Y5K4x4jwLlu/pw5iFBf+AeOHbO0y8AOPPLlyIGw6tVRtVYt9O3b13Jd3nifAB+E7tq1S/Zvev9WSA9SCIHMISEWIzOvmhdFzCPA75f4ezu7dOliXgMe1sSHgTdv3vTL+VkMTgf+WJj3kuKKMfN2904YNmQUStZOi+DCt1HnESBvUXWoB/1nDqnj8BWcPrQdf+3ehfHjJ2D6zJ/ww/gpCFETkYgQuH33ujIw51siy168Ee1VKFmD8llyY9LITJcmo1f7I437DoG3Xn8dw8ePRwXVJe6cqakOzmLMkMZYyPzLjVb7OaMXLcKXS5di9i+/YNQPP6BGDVkNV2i8LvxBkzZtWr/8QeN1eNIBIeAggTuq3M0KFfC0CnZTuHlzB2tJMWcI8LuF89pnn32Grl27WlY7S5Uq5Yw6r9bh6ib7/eCDD3q1H840LganA9RcSVeybds21Kv/MNJnv4rnvwRCq9xO0GLuUIBHaW3X3sGuFcCq8cvBJLbffPMNunfvnqCOnAh8AjHXD2OvNjLvrWTejeVXlPckf9YKFiOzSA7/yv/kPWopp2U91yk3pttXrqCzGnb5RIYepM4xqyqNUcr+O3cwffdu5dVRE/369Uuw2smQ7CKeJcAfNIx+SKPTbDm+apVWmT57duRSroMiQiClEchcrRpGRURgkxp4z2eeCUhjs+p9rxVfm7/79OmTYLXTE6lFzPwbp+HsyfybN8+fx9mtW/UQclWsiPTBzgcLNf8bxUyyfq5r5MiR6Ny5M2qprTB12zk+mIfqAzx++wQ6xDN/APz222+OK5CSfkvg3wsbtZssU5f8e2GD18dRIlcji5GZJ3NJr/dHOuCbBIYPH4633noLlVX3XlWHo18sJVTZ3uqYqA4anJEqn9ps5YprCH+w+NqPFqNvgfrKHzTN3bTiMuf+A4T89erhiZUrAxWhjEsIxCHAhys6yqzal7np8GGsqa9+4AWwVFNzua9KNWXwr1IPvr788kt069bNstr50EMP+WqX4/SL9sB3330X55w7P5zZsgVz7/+9tlixwqXvY0d/F7hzPAGpm4EyaGw+9RFQso5zQ3xabW06uBH4tc/vWLNmjYSodw6jT9eKjb17302W7rILwJy13pSM6XJqA7N07qY68E+GtM4/zfLmOKRtzxE4cOCANjYfVk22cbLZdqpemDqGz52LxYsXo7GKzijieQJnzpzBFvUDY+DAgZ5vXFoUAgFEIJPaDlWcAdLUkdd6u4AyOEW8T6Bnz54JVjt93Ztw/fr1uHz5st9ud/CIwckgIsY+SO//mXmmB02fCNf7M501No1eFlW/wio9DjzxVDOcPH4WadJ4P63EmI3hOBizSt9TV9yNjTGmtNdLN45r45IBf7iSeevONa8iyJu5NEqpfZilVeCf4jkberUv0rj/EXiqSRPkUt121tg0RlxWvWmqjiZKH9OnGKlUjOvy6n4CXN2k+FtCcfeTkRaEQNIEUqnfZ0Yak8ItWiRdQUp4lUDlypWxfPlyfPXVV9qbcMGCBRg8eLDOcenVjtlonKubzCnqr1HePWJwVgtpBx4pRYYNG4YNq7ej9TfmjLh+R2D82kvo2esDfDVwkDlKRYtHCRy/tFUblzQwD8Xc+1Hn0Q7Ea4wPgAwjM1+WcvGuykch4BiBXr16YefBg+jmWPEkSz2pSjCBytsqafe0339PsrwUMJcAf9AwnUCmTJnMVSzahEAKIND+wgWky5w5BYw0sIb4/vvvW1Y7K6hATvTw4DlfE87Pnty/afb4PWJwmt1pX9bH5e4vBvZHtaeBgib9jk+nIm2Ed7yLr7/8Bo0ebYJGjRr5MgKv9s2XVl/3q5yYdJNldNkzV/d5lUv6NJnvucqqVUwamlnS5/Vqf6Rx/ydw8uRJvQ/mGTWU0iYOh0bnN7Nn47mZM9GyZUsTNYuqpAhwhTNcAjUlhUmuC4EEBHKUpY+GiL8SoKG5ZMkSvcL57rvvWvZ2chXUV4Tz84ABA3ylO8nuR+pk15AKdgkwUuPJ6LOoQN8wE6VMOBD8QDrs3bvXRK2iykwCV2+dw+bjU/Hz9tbouzwHxm9qgr8PD/WasZkzY1HULtQFr1SZj08bXsZLFaejqvI0EGPTzLuecnVtvR+5rpLJCBhIKL9yTdukAgiJeI7AxYsXsWHDBr9+gu45WtKSEBACgUiA+zh37tyJoKAgVKlSRT9U9YVxbt68GYwN48/bHWSF04G/pKVRKlyskuCMhZN0DabBmSMkFXIXjnVAc/KKBBe4hW07NyavkpR2K4FTV3bf24+pVjEPnFvm1rYcUV4ouOb9oD/NUCBbVUeqSBkh4BQBznVZU6VC3ljz57q8Kl3KWrW3RsRzBGT/pudYS0tCQAj4LgFGrOV+zm+//Vbv7Vy4cKFOUcgIt94SutMyVWKZMmW81QWX2xWD0wGES6L66VLc95bUXtS/1i1BSBnzf4CxAzkLAjt2b9N9kX+8R4ABk+gmy6A/Jy7v8F5HVMtpUqVDKeUmW5pBf9SRPYP6IxERAh4gsHrZMuR3g7HJrudTx3r1lHnvxInYN2mSHg1Dsou4jwB/0PAHVbALedbc1zvRLASEgL8QMNJolPzf/1CqXTt/6XaCfjJH52OPPaaNzrCwMHz66adgLk9viL/v3yQzMThN/svZsG4Tirhpi2XeYsDmXySktsm3LEl1N+9c1gYm92PSyLx881SSddxZgEYljctSTF2iXtOkTu/O5kS3EEiUwIbISND91R3yoFK6SO2Hnz95MjJJvkZ3ILbovHTpknZf/uWXX8An+1FRUX4bBdEyKHkjBISA1whE35+zmW/X36VUqVKYN2+ezn1pvbezhnWqGzcOMjo6GpHqu5arrG3atMHRo0dRsKB/LiyIwWnyH8qJ6NOokNtkpffVZc4BXIi54h7lojUOgbPXDmKvWsVkVNm9ytD0thTIVuW+kdkMhYNrebs70r4QwF21upnKTRwMvetU3rEG99vInj07SpQogZIlS+pXvjeOnDlzuqkngamWeZ3HKXexDep1l/oBE5Q6NfKlTYu/jhxB8eLFkTdrVlRRQTRqqgB1ffv2DUwIMiohIASEgIMEunTpYlntrFmzJvr16+e2uXHKlCmYrR4ARqg99YdPn0ZWNTfnU3ENpo4di1GjRqFAjhyoXLEiqqkAb/40P3vE4Dx4bqXO28j7+mixwP7yqhJWDtG7N6C4G2yCU1FAaLEQB//3kGLJJXD4/Fq9gsmVzGMXvR+wxFjBZH5MBgASEQK+RKCqMkiilFutO+SYUpo1Y0b06NEDmz75RDcxYsQI7N+/Xx/z58/Hvn37wEA3FObsNIzP+EZpRqVH5D8Cg5RLWI/PP0dzdYpfU8+qI+TuXeDmTV2IjzSj1apn9OrV+Fk9WZ+lXJrH/vwzzHii39FNLti64/KPEBACQsCNBPgwbraKoE6jz3q1k6mk4kv//v11mczJTJPT6cUXMVO1UefFXf52AABAAElEQVTaNTRTSrmWmev2bYCHEn7jHY+JwXG1ivxTRAR+//FHjFYGqhnzs24g3j8hyqg1a972iMEZFbMSRuCdQDc46zxcD1Nmb4h3y8z5eEZ505YqVdIcZaIFd+7eRI6SZ9CkIlC0OjByQ8JJw5OYsqTPcy83JvdjKiMzfZosnmxe2hICySJQ85FHwH2c7pATSmlZ9eVuLS+//LL1R/2e7kaGEcpXGqE/qi9gvr9534AqXLhwoiujXClNScKUXY3VHs2zKtJ5bzVwui0nJswiSFdpHvXUj55f/vkHfKLfW6UK+GzQoMSqyDkhIASEQIoh8Oabb1pWOx9++GF89NFHoIFpCKN9c+WR30NcrXREGITvETXPllZzbndVIdhGpWzqPI9S6ghXc/ok1QbnZ7bHVVdfFo8YnL4MwOy+VatcC18NSIUbV2MRZHLu7JijqVE53LtGEXlVLdAORXOGI0fGULPxuV3fhetH77vJ0l12AUq0vPdU3+0N22ggX5Zy94xMZWAyKJWIEPAXAhUqVcJ51Vketr4cnR0Ld0k/rb7Ik5KQkBDwqJfIXqGDBw9qA9TaIOU+GJ6npFZupPHdc43PDz5oyxxLqke+ef3AgQPa6H5Ode9/yeziC6o878SwoUP1XqL5Ej04mQSluBAQAoFGoEiRIpg1axbGKjdX69XOunXrgu63aZUb7NSpU1G9enV07drV7vCHDx+Ot956Cy+pUnXtlkx4kfN5eXV8ojyBNm3ciLl//JGwkI+cEYPT5BvB5LG3b8XiyBb1hNhE2/DiaYAGJzcwe1uSitTr7f7Fb5/usUbAH7rNeluK52yoIsveC/iTN7P/hrj2Nkdp37sEypfn1xzwjzrMTI19Tuk7rfarVKulHD7vG4dsJ7lStGhR8GCUQWvhyqdhhHJFlO+Z83PatGngiimFblDxXXMNl908efJYq/OL97VUPrnnVU8bONlbunV9pVLVdF+1CvN//x3NnnrKSU1STQgIASEQOAQ6dOhgWe18RHn98OEnv0/ucpuCEhqSjHDLVcjEhA8DWYbzc3KNTUNfFfWGvifvqa0mixcvRuPGjY1LPvUqBqfJt6NYsWJ4rtVTWDX1d1MNzg3TgXIVSqN5c+68EbFFwFh9vXH7Imbv6aqjy55TAYC8KRnSZtcBf+gmW0q5y2ZKl9Ob3ZG2hYApBPiEt/ULL2DGjBkopb5czXLomK96V065uz7zzDPY4wYXzvTp06Ns2bL6iA/iwoULFmPUMEqXLFmCkSNHIkbtm6Hkzp3b5n7RLFnc6wYfofbsJDcXXKumTZFF7cl01ti0ZvSOus+PP/00Tpw4offNWl+T90JACAiBlEigUKFC+nuQrrWfffZZHAT0pGmnUsPsVGm+0qgHqfHliYYNEapOujo/85unrTqaNGnis/OzGJzqBpkto0dMUMF9FmH56Gto8Ibr2g9FApFzgNWrx8DdP2hc7613New9vUC7zDKViTcld6aSeh8m05aUyNXIm12RtoWA2wgMV4ZYURUyftyVK3jbhFYYqutvdaweP94rcx0j4dKgS8yoO3nypDZGjVVRGqQ0tvn5+vXrevQMV5/YyihddRP7sZEcZKdOndJPyl977TX07NlTr94mVZ/9m7FoEXolVdDB63Q05iPPTq1bY6ab9u862BUpJgSEgBDwKQJLly7VrrS37wf4Yee40nno0CFw3yfdb62l5wcfYK+KDG7W/Eynyq3qeFutuk5TgYd8TcTgdMMdYYj+Ud+NA4NcmGFwTu8JDPn2KyQWCcsN3fdzlbHwlrEZmqPOvZVMZWTmz6oiEYkIgQAnwLluxJgxeq5bpcbqyi7kq6r+OHUMGTjQJ+c6RsLlUadOnQR3lT8ojBVRwyBdrvY68pwhhkuu8WrsFw0NDTWK2H2lXgr3BY1XBvn777+vDc/gYNs7aL/q1w9Pq8iwdIk1S+is9a1aaZ05cyZatmxpllrRIwSEgBDwWwIDBgzAepXGKzaRSNw3btzAuHHj9APD119/XY+RDzAHfPUVOIOaPT9/PWcOWqvjiSee8CmeYnA6cDsGNo51oNR/RfgHxyfkdR6phe9f3YQXB99Exuz/XXf03QUVqvG3vmnVk/FY7NkVhTnqD6hZs2b6CYqjOlJauVLKbXXbyRkeGXa61Bn1KibdZLmSmTUon0falUaEgC8ReOmll7BgwQIdHTa16pgz+1CUEwf47LdAvnzopgwpQ7IqY8wfkofTaOTRSOWttJY7at+jYYAaBun27du1scYE3pQMGTIkuipKwzSf4mEI66dLl86ymvqtyqPJVDEff/yxNj6NcsYro9JuVRFpzf7JkV418IBKR7Nu5UoxOA3Y8ioEhICFgDFnc/5OCbJ582b98I9jDQoKAg3MxOSNN97QRmclFXBv61auRQLVEivowrliqm5+5bq7Z88eMThd4OhzVY8fP65/TPAHBY9du3bp4/Dhw/opxxj15P/U2Ub46Pn+qN8BqKK++dOkS3oYt5R31pLvgB1LVJ2wh/B+l7Zg3rknn3xS/zih0dlU7cvhKyM0ivxHgIafO4WReUvnbmqJLJsK/IktIgRSNgGuujFg2gfKReiAQvG0Omyvu8VlNUl9ZCivaiqR9cYtKtqalZRSe194+KvQjbZMmTL6iD8GGoSGEWoYpatUUB6uXp45c0YXz6ESfBsrokeU61WqVKksavijhkfv3r1B45NRCuluawjD7DMDaV7jhImvNIPX/flnsjWOud9//iB9QhmsIkJACAQegZT2/3blypXBFcvVKnfx33//jWVqu4FhUPKBorHlgvs527dvDxqom5WXSCF163O44fbnVQ86t6q+mCHRap6eW7++VtVixQowL6ezIiucDpLjHxJdpPjDgF/kUVFRuKL2LVEYhII/BKyfanz66adg9CrKE4+3xEuvPImpy/7FQw3v4IESQKEK+lKcf6J3q4Tb6tj0O5A2Novy9x4MY/mdYZf5I4SGJ4/u3bvra3TvouHJo6L6wZbSJUv6vDq9yMGYVaaheDB7db2CyZXMB7OHmaZXFAmBQCJAF09GhH3l+ecxSEWXfeTWLW3s8JGYtdHDWTP6/rFeGWT/qvf9VLAF5hFLScL9+PyhwiO+nD592mKMGkYpv3eM3KLW5blfiNF1+V1hGJ4MuBSpftDkUz883CEFlNJlVu7C7mhDdAoBISAE/IVA3rx58bQKqMaDwgeKhgFK24HutpyruTDFqLSHd+9GETcNjg8E9yj9viYeMTi5KuTvOQaZ3JUbfhmEwdqw5A21/hHAJxidOnVCnz59LPeaT/63R/yD7u91w7RfpmLF0bNInykVHigei0LKRozepY49ymC9EouQQrnwwjNPYtDAociaNatFB98wOmLbtm31wc/MKUfjc8KECejVq5dOmWKsfsZ37WJ5s2Tunm6IvrwVIVkqokXpoWapNU1PKbUC6YrBmSpVGstezNIqfUlwhsKm9U0UCYFAJsC5LkK58vTo0QO/qITX/6qnvpQgdeRXD+Uuqe0GZ/UZIKea3+qola6f1FxZo0aN+2flhQSYeoWH9b59psRiFF17smPHDu3myrD4t1UC8fz2CrtwjcGDLij9fADLvagiQkAICAEh8B8BPlBkxFgeXIBi8CCuftII1YcyQO+Zpv/VMesd5+f191N8maXTDD0eMTiZt9HfcjcmBpdG3eTJkxO7pM/Rd5ubdJnENTEZPGgoePBLmnl6Nmxcq1+ffbQmavWuo13SmFbFUeFqAo9hw4ZpPcbq55AhQ0BXLGPlk6/2Aks42p5RLvrSlnsGXSKbo40y3nxl+pEF+z9MVhcunwPql3/VElk2beoMyaovhYWAEPiPwNdffw0e//zzzz33IeVCtEW5yzJ8PPOR0RuDxqmI4wQYmCi+WLtrcb8n2TLReNWqVTFafS+oaU1ECAgBISAEvEyAi1HM08mDEqJWRC+odFUpSTxicAYKUD5hpivrd999F2dVk+PjFz9do6ZPVwkzkxA+Eebx4osvJlHS8cv8gcGDeYD+/fdfi+stV0QZuIJPvA0DlHuCAlWM1deM6XLg2q0Yu8MMyVoJEYtisOLXwzi2ExgRO95uebkoBIRA8ggwVycPuniKOE+AcQHoScOtGwxKlytXLh18givDDFDHuT9//rjrmfvUSvPwP/5wvlE7NekGnT1jRlndtMNILgkBISAEbBEIUw9dz6o0Ku4Qzs9FC3Djg2+JGJzJuB+LFy/GunXrtB+2dbW0adPqL/vffvvN+rTX3j/44IPo2LGjPrhZ2Vj5HKjSDXTr1g2MkGUYn3QVDiQxVl+zZyiYqMFZMldjS2TZ3JlKoH6v+srYPBxICGQsQkAIBBgBesUw9yYNSxqYhQsn7eZfRZU7wUTjbtjHeUzxLZcMb5wAux0yHCEgBISASwRqq0A8I91kcKoEFyhburRL/XNHZTE4HaA6Z+U4jBo8FQvn/onnVUCMzp07Y9SoURbDM6N60jtbJVlljjZfE668cnXBWGGgD/kf6qn3PJWs/YsvvkAB9RTEMD75ygBIyZGzZ8/qp+3JqeOJspnS5cSF60eRKV0ui5ssg/5kSJvNE81LG0JACAgB0whwT35y9+XTZfma6sEpdVgHbTKjU/xBUzORfKRm6BYdQkAICIFAJ1BZPRA8ogZJP7wcJg/2lHrQ2NoHF5PE4LRzoxmGnkYZ05sUqgR0G1EBQzr9omv8+uuvYFoUyqxZs1C+fHn93tf/YVRbHl9++SX2qhxtxuonI2sxx5u18VmwYMEkh8P6DPE8ePDgJMt6skDGtDnQodoyFMvZwJPNSltCQAiYSIAh2Y+rVCGUqiksiq2rGBm0oqLaBrJVRUVs5Koyq/o31fsTSnftRx+1OitvhYAQEAL3CGxSKZooTH/kShqNe9oC818jq0SEGp6Z83OU0ndcebWU9sEVztSBeStdG9XVq1d1lNnQ0FCsXbsWT/QCWg0ESof99xyCOc8oP/74Ix710y9e7kl95513sGTJEnCl8vvvv0emTJl0EnG65dLd9vPPP9cGpS2iXC1lkKRatWrpRLO2ynnjvBib3qAubQoB8wjQ4Izo108f5mlNOZreV+x+U/s+j5o45MVKV4UqVXQ0XBPViiohIAQChIAxZ3P+FkmcAD0iP1SpxLgRz+z5+fkWLXQA08Rb9t5ZMTjjsR86dChoaP7www/akGLy1tL14hVSH5nzjNFgW7dunfCiH57JmTMnXn75Zfz00084f/48Fi1apINSTJo0CVXUjwsGGnr77bf1eevhzZ07V7sWR0ZG6qBJNMBFhIAQEAJCwPsEnnvuOTynwvJPNakrDEYxTx2jHQiOZ1KTokYICAEhEJAEvlRxVUqpyO1mzc9rFKWt6hg2bpxP8krriV4dPLfSkhfx0WK+mdx76tSp2n02KipK57RkChS6mNoTBuAJVGFUWx40wOkya7jeMgVLmyEqSNJDwEGV3J2rwRQjFymNVhqf33zzTbLRMDBGGuV7npzUMMluRCoIASEgBFIQgWkLFiBPtmxYrkLwu7rBYLAK7T9v2jSn4xW0WLFCkw8KDk5Bd0CGKgSEgBBInMCcZcv0gs5yddmV+fmyqj9ZHVwsMjOeTG4VZNSYt/neFfGIwRkVsxJLo+65oPqawblw4UJtaP7111/o0qULlqmbHz+8vCuAA6Eu073w6N27N44dO4ZxmxqoYBT7cPToUR1kyDA2jbHSKF2zZg0mTpwIuu3akpkzZ2JjxDqsXr8C27fswYWYK7pojlxZUbV6RdSp8ajO1cf9pSJCQAgIASHgHIG16iEgvVRiVfWGTqg4quoMUw8Dw1UOucfVqqmzIvu5nCUn9YSAEAhEAsWLFwd/M7/11lvgElddJwYZqeqMVUfzpk31QpETKmxWSa8eDpo1b6dYl9qIiAgdubWpukGM1Lpt2zbtQivGps2/O32BrJpX6wk+OPh3Q7BlZdO61u3bt/UqJ9Ov0EU3vhw4cADNn26AZ599FuOmDsKJ65tQuvEVtOgJPNkbeKjZJfx75W+MmNhf36OGjeuCdUSEgBAQAkIg+QT4o+aSWuH8V+V/Zng3usY6KgyT95k6uqgfRPOX8zm8iBAQAkJACJhFoGvXruD2vXkq48UPSun5ZCiepMrS2Py4Tx/MVUE8fVk8ssLpSwAOHTqkVzTHKR9nBvtZrr5A66t8OCKOE6gW0k4blH9Nv7dqnVhNY9XzpZdewqZNmywutlOnTkGbNm1RvBbwkvrlU6Bswtr/rYnexSH16GbVhL/103k+BeL/mCJCQAgIASGQPAKMWrtWRSYfpDxVeqjo681VdSbyYnrwECtV9DOJvn9sCgpCUL58WPfLL6hRo4ZVKXkrBISAEBACZhFgGqvzaotapxdfxGCVZrHOtWt6fmauiFxWjVxU75kfg8cmFeQzU/78WKdip/jD/OwXBuc1BZ77CLds2YLIzZE4deokatd6WAezYWhhR/yVL1++rA1NpgPhytt0FfSAARVEnCPAPZ3M8cnVTO67TKUiIcbGxuLu3bsqz/gd/WpoZsoU7uvct38voo8dR9PuQPkmxlX7r6FVAB4Lh0C7HMz7Yw4WLVxis1LVAu1QNGc4cmQMtVlGLggBISAEUiqB91Tk8dqPP45x336LtWrrwy61NSJI7c0skDYtLqn5+7Sa0/Mq47RyuXJo/dhj6CvpaFLqn4qMWwgIAQ8TGPnzz6g1ZQpmq4d88zdswOHTp5FVzc0F1RF96xYuqN/XIdmzo5IyUF9q2NCv5mefNjgXL16MPh9/iG1bd+DG9VvI/kBqPFDiLrKpLNYjpizE0Z639Z9CmXLF8Wq7N/Duu+8m+qdBg4f5NDOq5eqRI0fizTffTLScnHScwMaNG3VY/GwqGAWPrFmz6ldbn1s80Vwbm11nABmzOd6OUfKxd4BaKiDwmLZL8fPPP+HFF9WHRISrryJCQAgIASFgm0Dt2rXBg0JXW3qh8CikIiYyKrkEbrPNTq4IASEgBNxJoE2bNsoTsI1uIjo6Wi/YcH4OCwvTcU0KFuS6p/+Jzxqcnw34GB/1/BTlG6ukqG+rBLLKzzJHgbtWhG/jhgqQGr2bxwG89957+G3uNMyZtQhM8UGZPHmyNjQPHz5siTzL1TgR1wnMVkv+jgpzlkZu2oynPnLO2DTaya78v1p9DZWK5iVUr15DfhQZYORVCAgBIeAkAT4sDA8P14eTKqSaEBACQkAIuIFASEgIeDRvzk0Q/i0+Z3CePHkStepUxT8HjqHFh0AZO9srgzIBRareO3Iqg3/56AjkypULPXr00FFSV69erd0wmeLEEbdbW7dyYGPG9hNxhsD69evx+eefoVwjoGQdZzTErVOoAlC5BdDxrZew9I91cS/KJyEgBAKKQNbQUOSvVy+gxiSDEQJCQAgEMgFjzub8LSIEDAI+ZXCeO3cO+VSAglyFVES86UCm7EY3k34tE66MU3XMUFFOv/76azRo0AA7duxA2bJlk64sJdxGgHtm85dIg2bv3XN/NqOhsGeAn7pHgiunsr/IDKKiQwj4JoFS7dqBh0hgEBij9vpT+IP0iZUr9Xv5RwgIgcAiIP9vB879jFbz9Nz7gVWZj9OVFCk+lRal5sNVkSEL8Oq45Bmb1rf2uc9VQBrlhsvos4ULF7a+JO+9QCAici1KNbhhasvBKqRiaLVb+HvdUlP1ijIhIASEgBAQAkJACAgBISAEzCXgMwYn92Du33MIz3zi+gCbqthBOZRR8ubbbV1XJhoSEJi7pxvGRNQHX+3JaRVd69i/p5A71F4p567lKwFsjtzqXGWpJQSEgBAQAkJACAgBISAEhIBHCHjE4GxUrB+4D9LWXshly5bpPI01WwEFy5kz7mbvAVO//w2jR482R6FosRCIvrQFB8+tBF/tSVRUlL6cJ9ReKeeu8e/k7KlL2Ldvn3MKpJYQEAJCQAgIASEgBISAEBACbifgE3s4f/31V5SolgGPtL9u2oALqK2bD6uowkOHD8Ibb7xhml5R5DiBPXv2IDhfGmTIesfxSg6WzFsMyJw9rQ7lX7JkSUstrrpGX96KkCwV0aL0UMt5eSMEUiKBlWr/BY9AkBwql3LlAwccGspylWvZliRHz+bixRGjclLakgYqN7QjYpaef1SMAx62pIriE6w4JSXn1Zgi1dhsSZETJ8AjKUmuHpXRTMvWrVsx5P6+oKTakOtCIFAJBNL8bOseXVBz0i0H5qRMal7jYUuuqvmIR1KSTs1t2e3MbY7qYTu57XyPcEwcmyNilh6Oi+OzJWcc/D5Kjp5YlZf51v0GJ73zDoKCg201n+R5nzA412/6E9kLmWdsGqPOp75PV0+JAvPYMKywiGcJ0KU2czAX0c03ODmS4LwqEa66t9aiV19jVgGxElnYmou8T5kEAsXY5N3jY6UmDt5Ge+NOjp6dERGw50PRysH+mKVnw/nzWKke5NmSKuqCYdTZKsPz56nn6FGbRfiTpobNq/9dcFbPcbYfIA9C/qMh74RA8gjI/wNWvNScADtzm1VJ+2+px87cZr9yvKtmzVFm6VHfR6aIs3ocNGht9dHrBudl9ZRg88ZdeKyurS46f95wz42MjBSD03mMTtcsXbo0zhxxj7F5Sz2fOLb/OirZeQLldMelohDwcwL9+vULuB/0XJk85eAT5XA780Jy9JRVT5RD7DxRPuXgF7BZevKoFYBwO6sAUHxOObCaADWmcDurAHnUSsIpB1YTnNVzVqVLCE/Gk3LmCRURAoFAgH/LXbt2xfbt2wNhOEmOQVY4zVspTc7KpL0b44yeNBkyIIf6TR9fmMe5e/fu8U8n+tnrBue2bdt0x/K4IaBskHpMm79kGu126UrS1A8W3wvlXjRHPXQMW5koSDmZkEDRokVx4+pdnD+uViPzJ7zuypmjO+7Vrly5sitqpK4QCEgCTBeUklMGfRSQd1UGJQSEQCAQGDZsWCAMQ8YgBJJFwCNBg+z1aP369fqyOyKZUnGBsncQEbHRXhfkmpsI0OBMnSY1zh4xv4Hjys+tSMn8yJkzp/nKRaMQEAJCQAgIASEgBISAEBACphDwusFp7K28dsGU8SRQcvE0kC+fyctrCVqRE4kRyJgxI4qVKOwWg/OU2qtdrYoju4wS65mcEwJCQAgIASEgBISAEBACQsATBDxicMZcO4SDKpALj/hi7ME7+2/8K+Z8PrU/DcTt0hyWzmh5rFFzRPyaDqcPOVM78TpHlBf2vtVAi2bPJF5AzgoBISAEhIAQEAJCQAgIASHgEwQ8YnBGRE/EmI3h+og/6lKlSiE4ZxYVXCb+Fdc/xxxT0fhO3pHAMq6jdFoD9yrkypkXi03csrBqPPDGW23Qpk0bp/slFYWAEBACQkAICAEhIASEgBBwPwGPGJxJDaNK1Upucbs8vhcIypBOVjiTugHJvB6StRIYQImvjsio78bh2E5gkTI6/3UxMNuGGcDVU5kw6tvJjjQtZYSAEBACQkAICAEhIASEgBDwIgGvR6nl2KtUrImff9+Mq+evIFOweTQObwYqVqoA7iUUMY9Ai9JDHVIWq3JhLlq0CEuWLEG2bNmw9Y+LKPiQqlreoeoJCkX8BqxUq5tbt65NcE1OCAEhIASEgBAQAkJACAgBIeB7BHzC4OzYsSMmTh6PJSOAJ3ubA2n3CmD7YrWqtugLcxSKFocIHDx4UBuZ8+fPx+LFi3Hz5k0EBQWBOTkbNGiAIV8PwQFlL9Z/A8iWxyGVOBUFLFQ27gkVmZYuuhUqVLBZ0Vh1NV5tFpQLQkAICAEhIASEgBAQAkJACLidgE8YnMVVAuqP+/THW2+9hU1lgapPuTbuqyri7dwBwOcD+qFx48auKZPaSRJYtmyZNjLnzJmDvXv3Il26dLh165al3o0bNzBy5EjUrl0br7zyClq3ewrj2h1E/Y6xCK0C5CxoKRrnTUw0tHHKPZslHwrFX/uXgH8r9sTR1Vd7OuSaEBACQkAICAEhIASEgBAQAuYQ8AmDk0Pp2rUr5v0xB4tHLXXZ4Pz5PZWjsXhB9PqgrzmUREuiBJ577jlwJfPq1avIkCEDrl+/rstZG5tp0qRBnz59tLHJi+XKlcO2iAP44osv0Lv3veXsjNmU0fkgkCcUSKV2FZ85DL2n9+p5rQ49e/bU5e99kn+FgBAQAkJACAgBISAEhIAQ8BcCPmNwEtiihUuQKlUqfNUEeP5L6NWv5IA8ugOYpWzM65fVvtBGZbQBREPIVelYTfnnKsmQLthVVQFVv3///pg9e7Yek2FsWg+QxibT3vTr18/6tH7fq1cvtGvXDpGRkYiI2Ii1G/7Elk07kCZNaoRVr4K6bRpq19mKFSvigQceSFBfTggBISAEhIAQEAJCQAgIASHg+wR8yuAkLgaaefe9tzG45zBUfw6o3RpInylpkH/+AKybBpQoHYoJYyfjtdde03sGZ8yYgQIFCiStwE6JojnD7VxNuZfKlCmDMWPGaDfZxCjcuXMHI0aojbk2JCQkBDyaN29uo4ScFgJCQAgIASEgBISAEBACQsCfCficwUmY3wz6Fk0aP46Ondvj163nkSP0KnIomzF3YXUUATKrhcYzh4DTyvWSr+ePBOFAxA28++67GDRoEFVg+fLloMsnA9XQ6LQXaEZXSOY/MdcOYVP0JIdqPVpMLbvakOToqRryP+TIGGpDE7A06hOb16wvMKWJPSOaeVPPX1NwkxDqad++PSZMmIANGzbE2beZOnVqdHjvKVzKvVD1a6FdTcEZC6NaSDubZQ6eW4mDMatsXjcuJKXHKCevQkAICAEhIASEgBAQAkJACHiGgE8anBw6g/2s+zsSgwcPxrZdG7F39W6smXoKt2/dtZB5oGAwSpcpofYH1sITA55Aw4YNLde4qmkYnTw/ffp01K9f33Ld1Tc0FJdE9XNITVIGp6N6aODZMzgd1cP+2DM4Nx2b6JCBVzzb4/j4u/FYvXo1cufOjXPnzuHu3btImzYtqlatikrPnnWIEcdlz+CMilnpkDGdlB6HbpYUEgJCQAgIASEgBISAEBACQsA0Ah4xOBsV6wceyRXu3Rs4cGCcagcOHEBMTAxKliyJ7Nmzx7kW/wP3b86dO1fvFaTRyZXOli1bxi8mn50ksDpikdp/WQyMUkvWDz/8sNZ0+/Zt7UobcfddJzVLNSEgBISAEBACQkAICAEhIAQCgUAqtWcy1pmBcLVw5cqVCA8Px4oVK5xR4dE63bt3x5AhQ/Seww4dOni07UBrrNevRXAn2yHcPPUAPn/mIDJlurfJlqvRdGtmBFpGlvUHMf6O2Vcn/1fwh2FKH4WAEBACQkAICAEhIASEgFcIeGSF0ysji9cojaFcuXKhY8eOOHv2rN8YRPGG4dWPhw8fRqdOnZC9wSE8WB4oXaq0xdhkx2jUr1+/Xth69S5J40JACAgBISAEhIAQEAJCwHcIqKyHKUeY93HUqFFgSo733lPJOkUcJvDLL7+gSpUq2p25apWqNuuNHDnS5jW5IASEgBAQAkJACAgBISAEhEDKIpCiDE7e2jfeeEMHEPrmm290hNWUdbudG+0777yDVq1a4X//+x/WrFmDLFmy2FTEVWQRISAEhIAQEAJCQAgIASEgBIQACaQ4g5ODZroUBrqZM2cOnnjiCdy4cYOnReIR2Lx5M2rVqoXJkyeDK5x0SxYRAkJACAgBISAEhIAQEAJCQAg4SiBFGpyEw/ycTJuyZ88e/T46OtpRZimi3OjRo3Vqk5w5cyIyMhLPP/98ihi3DFIICAEhIASEgBAQAkJACAgB8wikWIOTCCtWrKiNTuaOZNqU7du3m0fWTzVdvXpVp5F588038cknn+CPP/5A4cKF/XQ00m0hIASEgBAQAkJACAgBISAEvEnAIwbnkqh++GBxKn14c7CJtV2wYEHtXlusWDFtdDLVS0oVrvgyMNDq1auxePFifPTRRykVhYxbCAgBISAEhIAQEAJCQAgIARMIeMTgNKGfblXBPJLz5s3DY489po3OWbNmubU9X1TO3Jlc5Q0LC9MutI0aNbLZzZCslVA0Rz3wVUQICAEhIASEgBAQAkJACAgBIWCLQIrJw2kLgPV5BsdhlNWWLVti7NixeP31160vB+T7o0eP6tyac+fOxXfffYfOnTsnOc4WpYcmWUYKCAEhIASEgBAQAkJACAgBISAExOCM9zcwZMgQbXR26NABZ8+exYcffhivROB8nDlzpjY2uUdz48aNqFatWuAMTkYiBISAEBACQkAICAEhIASEgNcJiMGZyC3o06ePNjo7deqkjc6vv/46kVL+fapHjx4YNGgQunbtimHDhvn3YKT3QkAICAEhIASEgBAQAkJACPgkATE4bdwWRmmle+0LL7ygjc7vv//eRkn/Os1IvDSk+frjjz+idevW/jUA6a0QEAJCQAgIASEgBISAEBACfkNADE47t4q5J2l0Pvfcc9ronDFjBtKnT2+nhm9fGjdunDY2GRyIuTWLFi3q2x2W3gkBBwgw4BdX6g8dOuRAaSkiBFIOgfDwcKxYsSLlDFhGGjAESpQogQMHDgTMeGQgQsAMApzTf/vtNwQHB5uhzqM6JEptErhpnC1btgy7du3SUVyPHz+eRA3fu3zz5k28+uqr4L7U3r17Y+HChWJs+t5tkh45SWDTpk1ibDrJTqoFNgGm+UrJqb4C++4G7uiYA1yMzcC9vzIy5wlwPt+yZYvzCrxYU1Y4HYBfuXJlbXRyxZMG6PTp01GuXDkHanq/yJ9//qlXNa9cuYIFCxbo1C/u6NXKQ18hPPR9d6gWnULAYQL9+vVzuKwUFAKBTED+Xwjku5tyxiZ/xynnXstI7RPw9/8XxOC0f38tVwsVKpTA6KxXr57lui+++eqrr/DBBx+gVatWGDlyJHLkyGFKNyOiJ+L8tcMIzlgY1ULaYdauN7D+6BhEX9yMlg+NQVDabKa0I0qEQHIJ9O3bN7lVpLwQCDgCXCESEQKBQEDm9EC4izIGVwkEgqeKRwxOGiXFcoS7ytvr9TNnzow//vgDbdq00Sud3NP59NNPe71f8Ttw4sQJvapJP++hQ4fi7bffjl/Epc+bjk3EwZhVKJqjHm7fvaGNTSrcemKaMjoj8UzZsfqaS41IZSEgBISAEBACQkAICAEhIAT8noBH9nDmyBiKojnD9eH3xNQApkyZgi5duuCZZ57B+PHjEx0SjVFnhPvRxowdg5fbt0TZSsWRKlUqVAorjQ5dXtbtbtu2za7a33//HVWqVMHhw4exbt06041N68Zv3L6E39TqprWcvroPYzaG4+/DQ61Py3shIASEgBAQAkJACAgBISAEUiABj6xwBiJXrhwygu3rr7+uI9jSddWQb7/9FmPHjtXRbY1zSb3SkOz0djts27IT2QvcRo6Cd5GvFlDhZeDs4b2IUMeiv3/G+SPpUKx4EXzU84sEq6sffvghBg4cqFc3R4wYkVSTLl+ncWlL5u59B8cu0cV2LNKmDrJVTM4LASEgBISAEBACQkAICAEhEMAExOB04eZ+9NFH2ujs3LmzNjq5Z3Lq1Kno1q2b1jphwgQdHTapJkZNGIROr/VA+GvAmz0Tli5UwTh3V725gVUT9ujV1WdbNceMn+di9+7d2siMiIjA5MmTtcuvUcOdrzfvXLarPjJ68j0XW2V0Fg5W1rOIEBACQkAICAEhIASEgBAQAimKgBicLt7uTp06aaOTgXm2bt2KxYsXWzQyohTTkdiTqjXKIvrcXrw2AchZ0F7J/67VUyqLhAELBs1DUFA6dSE16tatC7rjlixZ8r+Cbnp34cYxhzWfuLwDIzfUxlNlRqDWg50cricFhYAQEAJCQAgIASEgBISAEPB/Ah7Zw+n/mOyP4IUXXsDw4cOxdOnSOAWZs3Pw4MFxzll/CC1WABlL7ULbEXccNjaN+lz17DgZCHvhNphnk217wtg8FPM3zl49YHTD4dffd3fGzJ2vI1b9JyIEhIAQEAJCQAgIASEgBIRAyiAgBqcJ95kJij/77LMEmu7cuYNPP/0U169fT3CtY+f2OHnqOB5WezRdEdZ/oDjwYtsnXVHjUN2bd65g5q4ODpVNrNCGY+MxfF01HLu4KbHLck4ICAEhIASEgBAQAkJACAiBACMgBqeLNzQmJgZPPPEE+Hr3LvdYxpWrV69iwIABcU4uWLAAY0dORN325qz2tVD7PqdNmaOj2MZpyOQP6dNkxuvVliBTupxOaz6m0qYMXxcGGp8iQkAICAEhIASEgBAQAkJACAQ2AY8YnEui+uGDxan0EWg4mWD7yJEjuHXrVqJDo7vrF198gVOnTlmu9/30Q5SoDVRubjnl0hvu/WTAobe6dUZSaVNcakhVzhZUAPmylHdJDd1q6V5LN1sRISAEhIAQEAJCQAgIASEgBAKXgEcMzsDFBzA9ysWLF3U+zurVq+uhZsiQIc6Q06RJY1nlpEG4ce021Hg+ThGXP1R/Drh99zoYqdZT8kCWssicLrfTza39dyRGrK+FE5e3O61DKgoBISAEhIAQEAJCQAgIASHguwTE4DTh3qROnRqvvPIK1q9fjw0bNuj3QUFBoKFJ4R7OIUOGgHs9Gck2c3BqhJQxoeF4KrIXuIWdu7fFO2v+x45hKzGwcSy6196BLjU3oESuRk43cuTCOu1iu0mlUBERAkJACAgBISAEhIAQEAJCILAIiMFp8v0MCwvDiBEjcOHCBf1auXJl3QJXPbmXc+3GlchfJuFeTzO6QdfabTs3mqHKYR05MxbBa1UXo17o+w7XiV/w9t0bmL7jf5i79534l+SzEBACQkAICAEhIASEgBAQAn5MQAxON908rnB27NgRkZGRWL16Ndq2bYvJkydj+bKVblnd5DDyFAH27zvophHZV9us5EC0rvAzgtJms1/QztW/Dw/F6I31cPrKXjul5JIQEAJCQAgIASEgBISAEBAC/kJADE4P3KnatWtjzJgxOHfuHPbuOug2gzNvMeBw1AkPjCjxJirma4WuNTagaI56iRdw4Ow/MX9i+PowbD0xzYHSUkQICAEhIASEgBAQAkJACAgBXyYgBqcH706WLFlQOewhRO92T6OnooDCxfK5R7mDWvNkLgXu8axTuJuDNRIWu3H7En7a9iLm7/sg4UU5IwSEgBAQAkJACAgBISAEhIDfEBCD08O3qmbNGm4zOE//A5QoqZY5fUBalBqCF8pNQtrUcSP2Jqdrqw59hfGbGuHcNTUwESEgBISAEBACQkAICAEhIAT8joAYnB6+ZbXC6uP4bvdgP3cUqFC2modHZLu5KiFt0VVFsS0UXMt2oSSu7D+7VEex3XFqVhIl5bIQEAJCQAgIASEgBISAEBACvkbAPZaPr43Sh/pTsWJFXDl/1y2rnBeOpUPZMhXcPtroS1twMGYV+JqU5MtSHp2rr0GtBzslVdTm9au3zmLKlpZYdOAjm2XkghAQAkJACAgBISAEhIAQEAK+R8AjBmexHOF4tFhfffgeAs/2qEKFCgirWR7rp5vb7oYZ0O6r1aq5f4Vz7p5uGLMxHHx1VJ4qMwIty45DKvWfs7L84Gf4IfJxXLxxzFkVUk8ICAEhIASEgBAQAkJACAgBDxLwiMFZNGc4GhXrpw8Pjs1nm/rk44HYvwbYPM+cLtKVduV4YNjQEaBB66tSvcBrysV2Iwpkq+p0F/ecmY9h68Kw+7RJ8JzuiVQUAkJACAgBISAEhIAQEAJCICkCHjE4k+pESrvetGlTvP5mW/z1g/OrfdbM5n4JPP/y42jTpo31aZ98T2OTRieNT2fl0o3jmLi5BZYd/NRZFVJPCAgBISAEhIAQEAJCQAgIAQ8QEIPTA5ATa2LsyEl4IE8+rJ6a2FXHz7H+yQPAL1P8Z8WPbrV0r6WbrSuy+MDHmLzlaVy5edoVNVJXCAgBISAEhIAQEAJCQAgIATcREIPTTWAdUXvoYDSu7i2DyZ1Tg26xyZEj24DRbYHImUHYsGFDcqr6TFkGEmJAIQYWclZ2nvodw9eHYd/Zxc6qkHpCwOME+P9sbGysx9u9evUq+vfvj08//RQ3b95M0H5MTAyuX7+e4Lw3Tty9excnT570RtNx2oyOjsaRI0cSPchLRAgIASHgq3M65/wLFy74xA3ylTndFgx+J546dcrWZTnvIgExOF0E6Gr1yPW70OeDLzD+VYCBfxyRVROAaT2AhuFNcPXKdYSFhTlSzSfLMGUKXWyZQsVZibl2GBM2NcHKfwY6q0LqCQGPETh69Chq1KiBNWvURm4Py6VLl9C3b198/PHHCQzL9evXI2fOnChRogRu3LjhkZ7dvn3bZjtPPfUU8uXLh3Hjxtks44kLBQoUQOHChRM93nnnHU90QdoQAkLAhwn46pxOY5NzaHBwMLZtU6sUHhB/mNMTw8AHwK+99hqefPLJxC7LORMIiMFpAkRXVXR+7QNs3boVNw9UwsgX0uPH7sCCwUDEbwBXMjfPBZZ8B/ysjMxRrdLj+sESmDVrFmZNW+hq0z5RP23qILxQbhJalBriUn8W7P8QP25rheu3feNpnkuDkcoBS2DSpEl6bFOnuuhPbzIh48kufzxdu3bNZO1x1f37779o27atNrzjXvnv0+HDh/UHo1//XfHOu6JFi+qgbAzMZhyhoaHe6Yy0KgSEgM8Q8NU5/fLly+BDRoq7Vzn9cU63/gMaNGgQpkyZYn1K3ptMIK3J+kSdkwT4A2b1is3YtGkTNkasw1/rlmDLmm1YPvofVKhaAtVqVMLDLR9Ftco1fToSrZPD19XqFO6GkGyVMWtXR5y+stcpVdtO/ILoi5Fo+dBYMDqyiBDwJQJ8+jts2DDdpdGjR+Obb75BpkyZfKKLzZo1w7x581CwYEH9RNydndqxY4f+cn/ooYdsNsO+rFu3Do8//rjNMp688NtvvwXs3OtJjtKWEAgkAr48p+fNmxcbN27EmTNn8PDDD7sVuz/O6QaQBQsW4P333zc+yqubCHjE4IyInohN0fee6nestsJNQwkMtVWrVgWPNzp2DowBJXMURXPUQ9caGzBTGZ1bT0xLZu17xc9c3Y8xEfXRvNRg1C0sLm9OQZRKbiGwZMmSOHtE5syZg1atWsVp686dO/ppdJo0aZA9e3bwyTH3EBYpUgQhISFxyvIDn2Jz70mWLFnAHz+7d+9G2rRpUbJkSWTMmDFB+cRO8Cn4rVu3UKtWLV03sTI8xx8uBw4cQI4cOVC8eHGwj9ZCtyT299ChQ8iaNStoUAYFBVkX0aunxtN27hc9d+6cvs4+Z8uWTY/h4sWLyJw5Mxo2bIj06dPHqc8P3AvENk6cOKFdgPPkyZOgDLmQJRlSuBeTdbgqmRjHBArkhBAQAkIgCQK+OqfTS4UHPTN4pE6duEPjlStXsG/fPj3KMmXKIEOGDAlGzHmfZaiDZYw51SjIdlyd06mLe/b5/WJsY0iVKpXRhH51x5zO70s+bBVxP4HE/wJNbjfm2iEcPLdSHyarFnUBSCAobTa0rvAzmpV0bU/mvL3d8cv2trh11zeCoATgrZIhJZPAmDFjdI2aNWvq14kTJybQwL2UuXLl0quMjz32GAoVKoQ6deroL+EHHngAixfHDZD17LPP6vJNmjTRRlq1atVQqVIlvXLKPSk03pISGnZskweNzvjC1b0HH3wQNOx4vXTp0tow/fDDDy3Bh+iSxP1C3O9Yr149VKlSRf94+eijj7QRaej85JNP8OKLL+qPBw8etLRbv359fW7p0qWWc+zPH3/8YVTVr9OmTdNsihUrpp/a8yl+2bJlsXnz5jjlyIV7l7hdoWLFipofn/LzxwyvGa5mcSrZ+UCDnPtu2T+6HYsIASEgBHx1TmdgOGNO5yuNRmvhnk7O5XxQybmaBx9QPv/88/rhHMty7uXcyXmfcyfLc07l/Gm91cHVOZ1zd6lSpfT3B7/r+HCVRi3nemsxe07nw87mzZvrJvg9IuJeAh4xON07BNHuaQIdw1ZiYONY8NWdUi/0fbxWdTFyZizidDORx6dg+LpqOHze8wFanO60VAxIAjRSZs+ercc2efJk/bpo0SIcO3bM5nh5nSugTz/9tF4x5Jc8DUu6m8aXlStXapfPV155BYZBO2HCBDz66KN6RTB+eevPNFK5IpqY9O7dG88884w2suj6T/2NGjXSRfl033gKTUOY/aNLrnGdhT777LM4gX9oDNetW9fS1MsvvwwehussfxyFh4dbrlu/+fbbb7WxSmORbXTp0kWvou7atUv/YIqMjLQurt+zvX/++UczZN8oM2fOxODBaqN8MoSM+KOL7dL4bt++fbKN1mQ0J0WFgBDwcQK+PKdzVdP4HoiP0TAkuWWBhhbnX+M7ZuHChdrbhXVolPKgt0qDBg10WZ7n/Mm51xBX5nT2gcYuV1DpEdO1a1fdFud4PpgcOXKk0Yzl1Yw5nd5A/G7lQ09+9/C7RcTNBJQLlFOifhAwpn8sX5OSxQf6xr6/CPpIqqxcFwLxCag8m7GTNz9j+Rsy/paS+7r6yHfxVeu/X/4d8xDxTwL9+vXT98/X76F64qz7qb7cNWhldOnPX3/9dRzwq1evtoxHuYBarqmnsbHK2NHXlHEYq1bc9DVlgOpzKmJqrHI1tZT//fffLXp+/vlnfV65oFrOKRcoS1m+UUasvqa+9C3n1Z5yS3n201q/cn2Kte5fRERErHoibamrfjDEtm7dWtdXP34s5/lm/vz5CdqKU0B9UD9CdBmOg3L8+HFLXwYMGKDP8R/llhurfjDoa+RjiMFFRfGOVa5a+rT6kRH7wQcf6LLK+DSK2n015gfq4b3jWIxzb731lt263rpo/f/EihUrvNUNaVcIOEXA+u/XKQUequTrc7pK22SZq06fPq2pKI+XWGVk6vNqNTNWudRaaJ0/fz6W87gh/PzVV1/FKpdZfYrfOSoGgUWnMriNok7N6WrLQyznVc6nKjqsnssNhepBpT6vjN1YfvdRzJrTqatbt25aP79LOc7p06frz8pI52WfE87jxveOv87pssKp7qCIbxPIlC432lSaicbFP3Wpo7N3d8GvO19TluVdl/RIZSGQXAJ8mjp8+HBdzdiz+cILL+jPttJ+8Mkz3VMN4b7JIUPuRXLm0+C9e+MG1uLqo7HayDoM7073KApdYp0R4+kyVyTfe++9OPrp0mrdP+49N8bEcPxcuWVqEwqfIpOBK6IMT12dXLp3V6G87wv3iNJtl8IV1/iuY506dbI8meee05deekmX5eoE3WSTkj59+uDXX3/V+Y4ZxXDnzp1o0aKFrsYAUJKLMymCcl0IBB4Bf53T6WVDTxSuWo4aNSpO0Dq6sXIeN4Sfe/ToobdGcK5kLAF6eBjCPfGuCD1TGNSIMnDgwDj7/Zlyin3kSueff/4ZpxlX5/Tvv/8eQ4cO1fq52ht/T2qcxuSDaQQ8EjTIurdLoz7RHxkcxl4UUQYaOq/yKyYlwRkLo1pIO5vF9N7RmFU2rxsXzNJDfY8W62uoTfDK/axGAKUEF+OdMEtP1ZD/IUfG0Hja//to3JP/ziT+Lik9idcy72zDon1QIGtlHVDo4g3bboj2Wtx4bAKOXboXxbZgtmr2iso1IWAaAbrGGl/yxhc2947QLYnGI790Hcmny72KhvDL3/qzcd769ZFHHoF6cqsDMVifd/S9kbvNMFzt1WOAnhkzZug8nxxTfOF+GRqLzoqhky7C6dKli6OG+4wM4Y+g3LlzGx8TvFr3gT9mmHvUnnAvlLUwqAYN3Llz5+rTDHLhyL2z1iHvhYAQ8G8C/jqn08ijcB5Nau5jObq8Mm8zH+bFF36nuSJRUVG6Og1L7uG0FkZvp+vsX3/9pR9YWl+L/z45c7ryIMKrr76qVdD4ZvA7Ec8Q8LjBuSSqnx4ZjSl7BuemYxNx0AFDkYarPYMzKmYlHDGozNLDwSVlKBoMNAg7/5ilh2OzZ3A62p+k9NgZimmXSud5HF1rbtCpU3afTriPzZGGoi9uVvs6w3TqFEfKSxkh4CoBI7BEmzZtLFFXuTpIQ4XGJnNyOmq0cG8OVwwZ0S8p4X5IClfznJE9e/boavnz57dbXfkfaeOZqV4o/AFAI5VGKJ+iU1jGFTHGm9iPJEa4NUS5jhlvE31NLOptogXtnLT+kWL0y05xuSQEhECAEfDXOd0w8oz97PZuy48//qj3dxpl1BYJvX+dni98WOfqnG7M1ba+X4yUYUnNsY7O6exz06ZNjeFAudVa3jPiOYUGNh9g8v7a2gNrqSRvkkXAIy61xXKEI0Pa7MnqmBQWArYIZAsKQbvKc9Gw6D03Olvlkjo/c1cHhDZJuBKTVD25LgSSQ4BpQozVMLpk8kmucRjuRD/88AOYIiQpYToQGpsUR1J7GAGJaKQ6I4wWSEnqC//vv/+GYWzOmjULar+ldiGmm5Q9SY6bLaPLUtR+mwQqrSPUGuUSFDLxhPW9sn66bmITokoICAEfJeDPczoDnlEMA8sWYhpnDCZE6dmzp06xQgNU7Z/XUcpt1UvOnM6o5hR+X8QX6uEqMsXoc/wyyf3MFDAclyFGUCS+Wq/W8rP1HG+Ul1fXCPz3WNg1PXZrcyXzkwYJfyTYq2RWBNRGxfqBh6tilh6yYIRXV8UsPeyHGf1xdTzO1G9cvD8KZKOLbQdcuRk35Lej+vJWicbLKjjZoqGO1pByQiB5BKxTnzC8vPUXsrFayS9BJp9mpEB7ooIFWC5zD2VSYuzdZBoTinUuNhpuzHtpT7gvlF++3D/JfTO2xOgXo/0lNQZrHXSTvXHjRpy9O9bXrd8b42U0XjK0XtW03uPDPJvuFhVYw9KEWT+GLArljRAQAj5NwJ/ndObRpDAaLfefMzZAYmI9x/Xv3z/OfJtYeeNccuZ044Emv/+4NcHac8Q64rizD0yNPhmvNHBtrcpyOwi9criquXbtWqOKvJpIwCMrnCb2V1QJgTgEyuZ9Gl1rbETJXI3jnE/OhxD1W7ztd3B4b21ydEvZlE2AhtF336k/LiUMPkN3pviH4eIzadKkOLCYTNv6qSuDBBlGH/eBGgaYUYmpPwxhu1988QW4X4Xy+uuv61e62BorcgycwBVTe9KhQwd9mft3qM/aWKbrEV2BKYabK3NeGjr5+vnnn+vr/Mc6uI71KqRhrLKMUZfv4wtTs1DoHmwEYOJnMho7dizfgi5fSRnRumAy/uncubMep9E37p1l/lEKf5wYT+mToVKKCgEh4KcE/H1OZyA3IxgPvxes52Wu3H755Zf6zjDfpiGc8wyhB4vhmcN9+YY4M6czDYrhttqrVy9LTmcyNtKU0PXXyNFstCWv/klADE7/vG/SaysC3J/6atVFCC/ygdXZ5L1No9b6p+9ohzl7uiWvopQWAnYIGIElWOS5555LtCSTWVMYwMDadZVPffnEt1mzZqhevbp2YzIC5wwaNCiBLj6FZhAhBiPilz/zZ1LatWuH2rVr6/dc4aRRRmGybuaVtCeMTmvkW6M+6qV+7nFhEnAawCrMPh577DGthsZgiRIlwL2qfJJu7VKrUrNYmuI1I+8nDW6OjyuFRn5SS0GrN1wdZlRYCqPUsg6fSJMRufBHFHN+mil86s79ShwPf4CRL/feGoE3VKqYOJF7zWxbdAkBIeB7BPx9TudDRyMyOvNpcj5r2LAhGGCuUKFCoOHHLRKc64w5msF7GIGcZVq2bGm5KZzTjUjfzszpjKpuRF7nCiN1sB22/dNPP+l2+HCRgdpE/J+AGJz+fw9lBPcJNC0xAK0rTFP7he27CdoDtvrItxi98RGcvrLHXjG5JgQcImCsWvKLm+6piQkNOEOM1B/GZxpZdLU1nigz+TaNoMR0ceWShhDDvHPVj58HDx6MCRMmGOr0K1fn3n//ff3eegXVcLe1dlVlIX7hcxx8acHlGgAAQABJREFU0szy1E83W+pXOS21my6NPu5TpdHHPaZc+aTLLvesqpybui0axIYwyix/7Bg/aDg+GqvWT9KZwoRi9IvvmRScT9jZF9bhjxQa5jRama7EcNFiWUOs6xvnjFfrNDLGOetXGtg0mo0VAcPQNII91alTx7q4vBcCQiDACfjjnM5bYj0P0qij26jKdaznz+XLl+tosCz35ptv6geLDMTDh6D8ruEcy2jnW7Zs0d8d+/fv1/M/6+3evZvVdORwZ+Z0rnBu375dr3Tyu4Pt8AEiVz8ZodZIraUbuf+P9Visz/N9UnN6/PLGZ0On8b1jnJdX8wikUv7MTm0o5BI399KEh4fD2iXKvK6JJiHgHIEzV/fp1ClMieOspE+TBS3LjkWlfC86q0LqeYgAV+pUonDdmpPTmYd66lgza9as0SuPNOi44slIflxF5NPn+OlAqJGri3zqzsBDXL2k4Ua3UnupQViPOukSlZw9jyzPg7qtXa6oj0JXKBqNvGa42fI8gzVwLPH34vB+sb+MZssgSI5GG6ROuoKxL7a4sIyZwiAbHAeNXeb+9GWx/n+C38/8nhYRAv5CwPrvV+Z0987pRs5k5qLkA874Bhv5R0dH672PfABnXOdczxRUXCG1/l5yZU5noB5+H3CbQpYsWfzlz9Uj/aS9ZbgW++ucrhwJRYRAYBHInakkOlZbgXl7u+Ovw0OcGtzNO5fx87bWYAqVZiW/ckqHVBICZhDgjwAejggNtvhGna16NEqTu9+RRqS1IRlfN1dHE2s/c+bMiZ7njxdng+4w2IWtgBfx+2XGZ9mraQZF0SEEhIAvzelMPcJtELaEczQNzfjCud46yI9x3ZU5na6ziek0dMurfxMQl1r/vn/SezsEmpcajBfKT0ba1M77/6869DXGRTyKc9fupaKw05xcEgJCQAgIASEgBISAEBACQiAeATE44wGRj4FFoEr+Nnir5kYUDr4XNMWZ0R04twzD14Vh+8mZzlSXOkJACAgBISAEhIAQEAJCIMUSMMWllv72IkLAtwk0Rtoit3E73wanunn11jlM3fosGhTtjSbFzY2E6VSHpFLAEmCQBqYzcTQyH6P8cT+jPbeogIUlAxMCQkAI+DgBmdN9/AZJ9zxCwGWDkxtZeYgIAX8gUEFlb2jytopk5uTa/vKDn+PYxUi0fGgssmco6A9Dlj76GQEGSzDSmDjSdYaSFxECQkAICAHfJCBzum/eF+mVZwk4+bMbOgeaZ7sqrQkB1wlsWwhM7gqc2O+8rr1nFmD4+jDsPj3XeSVSUwgIASEgBISAEBACQkAIpAACThuczE3G8MdyCAN/+xs4sT8WkzrHonrB153+X/zSjROYuPkJLI3q77QOqSgEhIAQEAJCQAgIASEgBAKdgNMGZ6CDkfEFPgG6xT5dZqRLA10S1ReTtzyFyzdPuaRHKgsBISAEhIAQEAJCQAgIgUAkIAZnIN5VGZPDBGo++CY611iLfFkrOFwnfsGdp2brKLb7zi6Kf0k+CwEhIASEgBWBO3fuYOLEiXjppZfQrFkztG/fHjNmzADPmyV///03PvzwQ1y8eNFhlUuXLtV5aceMGROnjjO64ihIxofIyEjdh/797XvODB8+HEeOHElU86effgoGEosvV69exY8//qi5NG/eXHMfMWIEeF5ECAgBIeBuAi4HDXJ3B0W/EHA3gULZa+KtGhsxc1cHbIqe5FRz568fwYRNj+GxEl+ifpEPndIhlYSAEBACgUyA2y9efPFFbWBynFmzZsWlS5cwc+ZMPPHEE0iTJo0pwx88eDB+++031KtXD02bNnVI57Vr13Rfbt68Gae8M7riKEjGh9u3b+s+JGUEjh8/Hr/88osO2Jg2bdyfcV9//TXy58+Pd955x9Ly5s2b0apVK+zbt89yjm9o+P/6669YsWJFnPPyQQgIASFgNgFZ4TSbqOjzSwJpUqfH8+UmokXpoS71f+H+nvhx6wu4fvu8S3qkshAQAkIg0Ahs2LBBG5sFCxbE/v379QokjaAJEyYgKCjItOF+9tlnGDduHOrXr++yTjN1udwZKwVMnfT5559bnUn87dGjR1GlShVtbLZp0wbbt28HjeqDBw+iW7dueqUz8ZpyVggIASFgHoG4j8bM0yuahIBfEqhT6G0UyFpZr3aevrLXqTFsOzkdxy7dS51SLKfrP3ic6oRUEgJCQAj4GAFjhY3utMWLF9e9Y/7YxHLInjhxAlyZu3v3LooWLQrr9D9cKd20aZNeIS1WrBjoivrPP/+gatWq4Erl9evXUbFiRaRO/d8zda4abtmyRZdjnRo1aiBVqlR2CdE4s9bFFUj2yZaEhoYiT548+jLLbtu2DYcOHdLnypcvj+Dg4ARVDx8+rMdSpEgRHYQxQQE7J/r166eN6kceecRmqQ8++EBf69q1K4YNG2Ypx/YSc721FJA3QkAICAETCYjBaSJMURUYBIrkeARda0Zg1s4O2HLiZ6cGdfbqAYyNaIDmpb5B3cLdndIhlYSAEBACgUSARiBl+fLlepUtffr0CYZHY5JuoYahZBRo0aIFvv/+e+TOnRsxMTEICwtDgwYNcOPGDXC1jzJ37ly0bt1au6Xy87FjxxASEoI1a9bgscces5zntZIlS+Lbb7/V5/k5MXn44YctdaiLe0KrV6+eWFF9bsmSJXj00Uf16i2N6o0bN1rK0n147Nix2rWVJ2mQ9u7dG1999VWcMpYPDrzhSvFzzz2HXbt2IVeuXAlqnD17Fj/99JM+37dv3wTX5YQQEAJCwFME/nv856kWpR0h4AcEgtJkwYsVfkKzkv/9GHCm2/P2votp29vg1p1rzlSXOkJACAiBgCFQrlw5PPTQQ9oQq1y5MhYsWJBgVY+BbWhsVqhQQbvazpo1S+/DpDHZuXPnOCxouNL45D5EGqM0Krl3s27dunHK0bjkyuN3332H+fPn6/2NXG393//+p1cw4xS2+hBfV758+TB58uQ4x5NPPqlrUBeNTa6IMigPjc0uXbqAwYg++eQTXYb7V7nqSRkwYIA2NjlO9olGM43o5AhZnTp1Cq+//noCjtRDt2UK97EmZpDqi/KPEBACQsADBGSF0wOQpQn/JVAvtIfFxfbctX+cGsjm41MRrVxsn1FpWEKDH3ZKh1QSAkJACPg7Abq4rl27Fq+88ooOFMQotTQOp0+fDhpzjFTbqVMnPUwao1ydpHB1s0CBArocA+ZYy+zZsy3uuTzfsGFDxC/DVVEaX4aLLQ0w7ielkUdXXGt3XWvd8XXRJZb7IA2Jjo7WRjBXGocOvbf/n4YvjVkG7WHAIQr1cHwdO3bEtGnTdFCfjz76SF9bvHgxHnjgAf3+yy+/1K6++oMD/9CVtmfPnmC90aNH480334xTKyoqSn+mC7G10O347bff1u7KPE+jmYxFhIAQEALuIiAGp7vIit6AIVA816PaxZZRbHecnOnUuE5e3oVRG+rgydLDUbtQF6d0SCUhIASEgL8TyJYtmw4ctGzZMvTq1Qt//fWX3m/JFcFbt25pF1a6n77xxhtxhkojicI9kTQ+KVwdNPaC6hN2/qHByVXFP//8UwfMMYoeP37cpsFplEnslcZxu3btdH/nzJlj2Z9puNEynQoj7xpy+vRp/fbAgQOWfaB0CTaMTaNccl+5j3PRokXaUK9Tp06c6mRNuXz5cpzzXIVlUCVDuJ9TDE6DhrwKASHgDgLiUusOqqIz4AhkSpcTbSr+iibFP3NpbLP3dMWMna/ibuwdl/RIZSEgBISAvxJgsB66n65bt04bbXQL5YomI6pSsmfPrlcEuSpoHEzr0aFDB4uxyXI0lBwR6i1durROA8IARd98843e/8m6DErkjHBFk3s2e/TogfDwcIsKGsQUrnoafecrgwbR9ZWGHVdVKU2aNNGvrvzDfbB0raU8++yzlj2n/GzwMdx4eY6SI0cO7YLLlWURISAEhIAnCMgKpycoSxsBQ6BB0d4IyVYZs9Rq54Xrx5waV8Sx7xF9UUWxLTsWBbMlb8+OUw1KJSEgBISADxKgiyv3PjIfJN1bGzdurHtJF1cG2LEl586ds3Up0fNvvfWWPj9lyhS8/PLL+j2DDXEPaFKRahNTyKi47733nt6P2r9//zhF6L66cuVKfb127dpxrhkfuC+VcubMGeOUS680pulSG39VmNF9uVrM/q5atUrnJXWpIaksBISAEHCSgKxwOglOqqVcAqVzN0PXGhtRJo/ze16iL23B8HXVsf6o7R9VKZewjFwICIFAJcCIqtbCPYwUrmoWLlwYefPm1SuH69evty6mgwPFOZGMDzt37tSln3nmGUutI0eO6PdMl0LJnDmzfqVxZk+uXLkCBv+hMAJshgwZ4hQ3jExGn6XbrSF0Fzba4monhUaitfHs7GordXH1N75bbKZMmUCXWwr3zcZf6TTcfHUB+UcICAEh4EYCssLpRriiOnAJZA3Kj3aV52BJVF8sjYr7hDs5o561q6Na7dyMpx8alZxqUlYICAEh4HcEuF+ybNmyOhor9xsagXs4EBpEXPHkaieDCdWsWROPP/64jjxLQ4l7Dg8ePGhxE03O4LnXkvtFGeW2Xr16emWTq52USZMmaUPNMBTZfrp06WyusHbv3l0HBeLK4ciRI+N0g6lQ2rZtq/vKYEYMesRAQTT8fvjhB+1GPGLECJ13lCu7bJurucyRSYOXUXSdFa7UklH8nKYMJPT7779b9srSKGWkYAY2YhReijOrvM72U+oJASGQMgmIwZky77uM2iQCjYp9gpCslcGAQldu3gsKkVzV646OxjEVxbalimKbP+u9PHXJ1SHlhYAQEAK+ToB5LJknkylOjOA6DPwzfPhwME0KhRFk582bB7rB/vHHH/qgcff8888jbdq4P1mMqLO2xm1cf+2117B7925tzNKgpMHFQDs09GbOvBcIjiuV3NPYp08fbbjFd+mlrs2bN1sM0UuXLlneG+1z5ZKRY5nmhC63jFj76aef6stss2rVqkZRbazSyKTRakSs5V5QGtU0eJ0RBiD6+eefQaPYkIwZM2oD+7PPPsOoUaM0e/KnMF1M+/btLZGBjTryKgSEgBAwm0AqlWQ51mylok8IpDQC568fxsydHbHv7CKnh54mVTo8o/Z1Vgtp57SOlFiROe4MtzGZzlLiX4CMOT4B6/8nVqxYARoyviS3b98Go7XSfTZnzpw2u8Y9jnRLdTWSq9EADV6KEb2VuhlQiK68hvAc3W2NgDvGeWde6UZ77NgxnQYlKCgoURVkwaBJTN3CAEBmCNO1GCll4utjW8xdyjHHdweOX9Zbn63/fmVO99ZdkHZ9iQD3hdevX193yRfndEdYpXakkJQRAkLAPoHgDIXxatWFCC/yof2Cdq7eib2FGTvaY86et+2UkktCQAgIAf8mwJVKBrqxZ2xyhDTCzDI2qY+GpmFs8nOaNGniGJvGOTOMTeriSmVoaChsGZssQxY0Ds0yNqnTlrHJazTyS5Uq5bPGJvsoIgSEQOAREIMz8O6pjMiLBJqW+BIvVfgFGdJmd7oXq48Mw6iNdXHqym6ndUhFISAEhIAQEAJCQAgIASHgCwTE4PSFuyB9CCgCFfI9j641N6JYznvuD84M7lDM3yqKbRi2HP/JmepSRwgIASEgBISAEBACQkAI+AQBMTh94jZIJwKNQO5MJdCh2nLULfxf8IbkjvHmnSv4eftL+GNfj+RWlfJCQAgIASEgBISAEBACQsAnCIjB6RO3QToRqASal/oGrcpPQbo0GZ0e4p+HBmFcREOcvRrltA6pKASEgBAQAkJACAgBISAEvEFADE5vUJc2UxSByvlfRtcaG1E4+GGnx33g3HIMXx+G7Sd/dVqHVBQCQkAICAEhIASEgBAQAp4mEDepladbl/aEQAoh8ECWsuhU/W/M3tMVa444l9z72q0YTN36HOoX6YXHSnyeQsjJMIWAiwRungfObnVMSS6VBzd9sO2yx1fZvmZ9JYtKs5E11PpM3PdntwA3L8Q9l9gnJ/UUTn8IobmAQ2cTUyrnhIAQEAJ+TsDROTS9CuCYq5LtwV46BFw+bPu69ZX89aw/xX2fnO8ZJ/QEX9uKeiWBVfviNutPn8Tg9Ke7JX31ewJPlh6OkKyVMWtXB9yNvePUeFb88wWiL0Wi5UNjkT3Dg07pkEpCIEUQ4I+SmZUdH2rzFSqnRLjt8nPtXLOuVbUvULWf9Zm479d0AxwxXp3U004Zm+2+AN6ZHrfZlPhp7969etibN2/G9u3b0apVK5QvXz4OCubL/OKLL8Ccjx9//DFSp/7P+evu3bvo0KEDRo8erVOYxKmoPmzYsAFRUVF48cUX41xifk3mk+zVqxcyZry3pWLXrl348ccf0bBhQzRo0CBOeePDoUOHwPyj1apVM07JqxAQAtYEFj+lnqbNtj5j+z2NuxYrbV/fNxHY9Int69ZXOsRaf4r7/oz6rpn3//bOBLyG6/3j39h3Yl8r9n3fKaKKLuimSlHUVq2qVlXpvz+hLdXaSje6oVr7Uqq0imhrq8S+E6KInRDEnv/5npi4Se5N7jJJ7vK+zzOZuTNn3jnnMzfnzjvnfd/TIv4+W5+c0FNT6QoeDEReB9SVPFIe9KoeWX2ptBDwPAL1ir2ss9gWz+X8A8WB8ysxWWWx3XtuqecBkBoLgdQiEL4kta7kltd5mk8pPiw0MuvVq4dbt27p+TBpVPbv318blpZYvvnmGwQFBeHKlSvxjE2W4STr3333HdasWWN5Stz2okWL0K9fP9DAtJRNmzbhww8/xO7du+N20zBlHajTlnA+TtaZE72LCAEhYIWAvcamlVM9fVeebECe655pcsoIp6d/+6T+HkmAo5ycOmXR3n7YfGKaU224eusMZmx7Cq3KBOHRMiOc0iEnCQGvJsBRRi6harFHknKD5fkccbRHigQmXapCj6RHUo2zndVj7xt74zpeuL579y569+6N6tWrx41o9ujRA9OnT8eCBQvw/PPP61ZfunQJ7777LnLmzIn3338/EYlp02L75xkzZqB169aJjm/duhVRUVHYuHEjmjZtGnd8+fLletsweuMOJLNRtGhRXbdevXppY9UYHU3mNDksBHyHAEcII4KVl4hakpPk+nT2sXWSU2LHcV7H3t+HpNTZ0BO5dwnoVuvJIganJ989qbvHE3i28lTtYrt4X3+n27IqLAgn77vY5shUyGk9cqIQ8FoCSbm3OtJos/SU7+HIVW2XtaVHDE7Mnj0bNAbnzXvgV/zRRx9pg/Ott97Ck08+iWzZsulRSBqMNCz9/f3jsT59+nTc+T///DM+//zzRGU4kklZvHhxPIOTI5+U0NBQvXbkz2uvvYbAwEB89dVXYF1FhIAQSECAoQ9JhT8kKG7zo1l6tKEYZPMydh+woSdSudp7usEpLrV2fwukoBBIGQINS7yCAQ02oUjO6k5fYO/ZpZiiXGzpaisiBISAbxNYdzUQI38Fpm/0XQ4//PCDbnzbtm3jIHD08NNPP8WJEyfw2WefgfGdEyZM0KOgPXv2jCtnbMyaNUtvduvWTa9pVFrK8ePH9egm9zE2k/GelEOHDuHgwdjsHoZBqg/Y+adZs2Z6xHXq1Kl2niHFhIAQ8GYCkVlr6j6d/fqNjIU9sqlicHrkbZNKexuBErkbKBfbENQp2sPppkXeOI7vtz6OtUfHOK1DThQCQsDzCQRHBSJomTI4N3h+W5xpAUcmGXNJN9mELqmvv/46SpcurZP50MWWMmXKlEQJgWg8coSxYMGC2khlue+//56rOLGMzzx79qxOIMSDK1c+ePG3c+dOXL+uMn04IH5+fihVqpQ2Wi2v4YAKKZpKBDiCzdFxxvnyXosIgZQgEJmtpu7T2a+LwZkShEWnEPAhAun9MqJj1R/QvuJnLrV65aHhevqU6NuRLulx9eTb96JdVSHnCwEhIAQcJsBssJRy5colOjdz5syYPHmy3s/Rx44dO4IjigmFiX2OHDmCl19+GYUKFUKrVq2wfv16PXpplDUMjC+//FLvMkZAlyxZoo1duvBSjHL6g51/SpSIzUAuBqedwFKpGEfH337zdbRoUBG5smfS2YQnjnwdE4NeRY0aNZA7R2a0aVYTQ4YMweHDh1OpVnIZIeD+BCSG0/3vkdTQxwg0eWhg3NQpZ6/td6r1u84sUFOnbNNTp5TJ+4hTOlw5acWhYbh19yo4DYw7ytA//OyqVr+6a1E6b6DNsvbqYVInJneyJVO3BOLIpXW2DsftT04P43n/DBsZV97WRmn/5uhXL9jWYdirhwrGtlYJHGzIkYvBmBrSwsbR+Lud1ZM1Qx40KflGknzjX0k+eTsBGgWUYsWKWW0q4zdr166tYzytJQriSUayIBqklA4dOmDVqlVgLOeIEbHJoxgjSnnqqadAF14ee+edd/Toap8+fVC/fn19nImDGjZsqLft/WPUnW67Iu5BYOHcmRg08DW0LncN3SrGYIz6aa1ZHMiS8Zau4E2VqHj3yVvYHbEDqzceQNPpUzH8fx+Bo+oiQsDXCYhLra9/A6T9bkmglH9T7WJbs8iLTtfvwvUwTAtpib/Cxzutw5kTd56eh+CjH2PDf5/j4PnfnVEh5wgBuwlE34nE+mOueQXYfTEp6BEErl27puuZJ08em/XNly+fzTJGsiC63laoUEG7xBrzZn777bdxsZohISF6JJOxoYzzpKFrjGrSQK1cubK+Bss5KkbdjbY4er6UN48Ap9Xp2LYpXnqpB77ocBXfvRSDl5sADUvR2HxwncxqCKdOSaB7I2DmSzcw9JEovP3WG3ji0cYPCsmWEPBRAjLC6aM3Xprt/gQypc+OztV+QjE1hcryg0OcrvDyg2/r0c5nK09DpvRqEqcUlPPXD2Hh3r5xV/jt0Dson79N3Gd32eAInz2SJaPtB1aeb68e/6wBSV6uaM6aSR43Dianh8ftqVNy17NXj1EvW2vys6c+ts439tvSc+lGOC5FHwONTqsSFQ5cPRZ7KF8NIFPS99OqDtnpcQQKFCig63zjxg2n6v7jjz/q8+hSmz179ng6aFT+9ddfqFWrlna5past5emnn8bAgQMxceJEbYQyyyzn1GQc6T///BNPhz0fjLhPxpCKpB0BusXSNbtTPeCagw47g1oCg1rGoMX4jWBcbkxMTNo1xJuufGpdbGsy5Qby2ffb6U3N99S2iMHpqXfOznpzImumcucbVk5MzbiQLl26oHFj8964MZECrzNo0CA7awWMGjUK48aN0z/EnCfNEGd0Gec6uv7iiy8wbNgwMH39o48+avV0/uizniyXMaPFq0xVmm/BmWyiQYMGaN++fbzzT548iV9//VXH7hw9elQngOAb8meffVb/8MQrnMyHZgFvo2iuWli4py8uRh9JprT1w9tO/YSTV7biuSrTEJDnYeuFTNi7SBmbN+5cjtN0KmonVh56D4+Vi41lijuQRhuWLp7JucvaU8Wk3FLtOd8o067iJGPTpXVdlXSKi6tilh4atmYwsqUnWdffg9PVvBQjY3G0XWtOGn1X4cr5KU6AMZcU/i45KkwWZMR4MmlQjhw54lTQNZZZbWmQ0pik0DWXwt9WzsP5999/67hQ4/jDDz+MFStWxGWz1YXt+HP16lVdymiLHadIkRQg0LRxfQSWB2b3dl752sFA+y+Aoa+/hLFTZjqvSM6MJbAsMHZdpDnQLjh2W/66PQExON3+FjlfwcjISNSpU0e/hbXUwpiQpUuXWu5yepuTa3OCagoz/hluQMkp5IMA5z7j+YY4q8s439E1336zDrdv37Z5KsswXofr0aNHxyt3/vx5vY/xGZYG59y5c8H4Heq2FCaWoKFtK2bIsmzC7bJ5W2KgymLL0UPGZzojZ6/tw1f/NlVJiSajyUPmx5T8emAwwlTMXkJZe3Q0KhV4EiXzmPeSI+E15LMQEAJCwCDA5C0UTk/iqDC7LUcxGXP5yiuvxDudsZ80OPli1LhG1apV48q8+OKLcQansbNmzZra4Ny1a5exS8d6Wn42DjBu1BjRNOper54aWhNJEwIvd++Ci5cisfZD1y8/oyeQ980f0bJdV7Ru3dp1haJBCHgYATE4PeyGOVJdxprQJeiZZ54Bt3Pnzg1m5aNrh1mSPn16/P7772Ccib3Gpq1rm6nL1jWc3T9mzBi0bNlSL0npoCHfqVMnXWTs2LH6TTeTP+zYsUMbm4zrcVayZvRH1xrzsebIaPx++D1n1WDp/oGIuKISCqnRznR+5nQBW0/Nwt/HJtis028H30H/+v/YPC4HhIAzBJiIKalkTM7odPUcvmjiVAl62bIJ6dNnQKMmTUHDg94cliNmrl7L1vnd801HczWqsuO4rRLevT9Xrlx4/PHHtaFHL5Vs2ewPJTDm73zppZcSQfL39weNzuXLl+O3337Tx6tUqRJXjm61TBpEd1pDDMOUvwHFi6sMM0r4u8wlodAbiULXS2a25ffloYceSlhMPqcCgfnz5+OHmT9jYkdzLuavvoKTXgAG9uuKdZt26czH5mgWLSlNIF6fHvKvem4C6jVorF86VatWDUY8eErWo/CVleBIOSXHzcPqbyA3PUrMedr0qCb7TmX37dunG8sRyLx58+rtJk1UpLsV4QTYXLJkyQL+gBoZ8liU/2z79+/XP5aMR9mwYYM2MKnr2LFj4I9w4cKF42ll3MOePXvAYHu6q7JMUkJ3X7orWeq6dOlSkmnF+Y/O+lLofsSMgRcuXNCuTfyhNlyajOtyBJXp8tkWusE6Knx7zbfSxhvohOfzwea1117Tu/ljZWlc1q1b17RR5UdKD0cxutiq0c7LN04krIZdn0MifsDJKOViq+I6S+Sub9c5tgqduboHi5S7b1ISHrlezQ/6MVqUejepYnJMCHgsgT/++ANB7w1G6I59uHX7LsoX8kPj0jHK4AS+DJ6HPRH3kDlTetSpUQWDhvwfnn/++RRra0CmcAQoN0C+WvTVqLHevXtrg3PdunXa+EwIm/fLmvz000/gYksYKmFL+DvI3xfL354XXngBXAyxJ46Pv2X83e3fv79xmqxTmcBnk8ajW0PGYJp34QGBKpnQ5kg9rytDdUTcm4Du098fitBtu2L79MLp0bjUXWRUffoP/8xXL/Rie9caVcqgW8/+GDz4vkWYAs3Kcvu0du2m6u1qBgBPFDE4PfGu2VlnxpbQ9YcuQnwra03o2jpgwAAdk2J5fOjQodpdNF26dAgODtYuo3Qv4g8xfwgpnJPMMGCZzS8sLEzvZ5yiMR+Z3qH+0N3266+/BudAsyY810ghb+hauHChdk21Vp776NJKg5PxMdTPibcNKV++PObMmaMTO3DfxYsX9cgj09obQuPZXmFZ6uecbMuWLbM6Svznn39qVywysTQ27b2GI+Uq5H9cZ7Globf3nHPu0aeiduDzzQ3wbOWv0aB4P0cuH68sDV975txcqaZKoWtt4RzV4p0vH9KOQEREhH5RY4zIMQ6NroQclbGMrU67GnrGlceNHIQhQZ+hjwqPVmFaaFQGCMhnaerdw4lLwMYjd/Hbrp3a86Hni+3x/U+/eEYDPbCW9OxhTCVHLDnamVrCjLWuyowZM3SGWxrNImlD4OD+vegTPzWDyxVJnw7KYLmN0PXWX3a4fAFRYBqBcR8Nx5D/G4Ne8fr0ByFgfJV3Xtl9/4YDW8LD8Pbbb2PFkp8x75dVcQM8plXGSxSpr7+ItxJo06aNzpDHmBPGGFqbQPqtt97Sxma7du30CNzUqVP1Dx3dQY15yAw+NBjpKkQXWhqDfCA13IqMMlxzhJRupZz8mj/2LDd9+nTt1mtZznKbI6oJdfFhYebMmfEWGqMU6qUbA0dln3jiCb3vww8/1POk0TA+ePCgbrORUp6jCTQ2OacakzrwWlmzZtXn2fOHCX+on65URkKJhOexLhTOyZYakjNTIXSv9Qs4N6MrsmjvK+DijPyybwCORW6w+9TfDg61u6wUTDkC/F+uqN7K8v+O//sfjRmJkAO/4qeFX6nU/y9pgzNv/lxo176t/n9JuZp4tuYzZ86gYunCmPPDZ/h7iJq7sRvQWTkMBORL3K7iysnj+Tqq7+oRu6z9fSmKFMidpBdHYi2yx14CDB3h787GjRt10jx7z0vrckwyxzwAs2fPRoYMMiaQFvfjv//+w7mLUajq+ruDRNXntCmhO2OfFRIdlB1pTkD36WWK4ZOxH2PlG8C3SfTp+VU+sSdUCPeItsDvquz58K36uTQpD4k0b2AaVkB6szSEn9KXLlu2LLZs2aINL47KcWGCG7py0OWHMSLfffedNgh/+eWXuFE7ZtWj0cjRUcukCXQlpRHJUU9D+OY44UjhyJEj45VhDArjH+mKa7icGucba8Y1JdTF+c+4GLJy5cq4mFSOaFKMBDw0lBk/Q6ELLxMmcYSTxiVdmDjKyznRuM+IYWWsDd9K2Ssc9eVoKrPxNmvWLFHWWiPJQ6lSpeKppKE/adKDLKRDhgyJ1654hZ34wBg2Tp3Ckcartx6M8jqiavOJqWrqlK1qtHMakpsyw9C75eR32HBcpd5zQA6cX4GN6pxGJWJdjx04VYqaQICu5/0GdsXPP/yCJuqHtLx6N1IgAMgf8GA07oZyYDgXzh/PKOxYu0J915fjzTff1MlSTKiC16ig1wRdKKd2Bfo2daxZPRoprw+1jF5xRU+5cO7cOeTPn98xJVI6WQJ8QcnfHcMrJ9kT3KAAvYDoVVSpUiU3qI1vVsHw1qpazPz2M+Nt1PVbOsabSR1F3IeA0ae/Ggh84eC78daVAS7j1OB1165ddeiVMWWS+7QwbWsiBmfa8k/xq9Ngo2H5888/491339XTeGzfvl3/oHFN4T+ZtVE5xqJYSvfu3eMZkpbHLLcZK8kRSCYqYhynIYz3dFbozsrr07jl6IwhjM+h8Fo0kA0xrsu3xcw4SOF0MIaxaZRzZM23zUyHz4cYjuCyjZbCRBUUY/4041h4eLg27I3PbIelIW3sd2VdueBTauqU2sro7IOD5393StXxy1swZVM9HddZt1jPJHWcVEmHaOA6I0wgVDF/W/hnVa96RVKNAEf4mR2xRHU1Mbl6T1CorPVLZ1Ge5iWU1zOXWu3u4c8voecWnL9wDv75ayNKlpT7RnLNGlTT8V2OGpuW1IcrT8+sGVX6h0Y1sPvQSctDsm0SAbqJe5LQJdcMt1xParO71ZX5L6oUS4/MGe6aXjV6PxTPl1G/7Gb4DfNrMHcFl4TbjiS7Mr2iPqiwWaNaelT7i87ON/7t1gBdp/lbyxddqZEkzvnapu6ZYnCmLu80uRrfmPbs2VNnq2UHx1E/dqiGAVikSJFESX84rYcxebZRaXt/uN977z0dFM8RUcagUD9HE50VjlAydpJGJ0dpjaQ9TEhkxG0m/IHmyANHVRnHSpcqCt1iXRWOXtLo7NatW6L4Uo4oU5iYyFLatm2rR1k5usupUVJK8mQpgV61V6p5L4erBD1jnLrMvZg7mL/nZZVQaBueUtOn2BLOtxkTc8/W4ST337p7Hb8degddqs9NspwcNI8AXXz41vWRfkDdZx3T++irQPU2wLzhpxAQECA/ogrf22/0xd1rp0zJYPnmo8DSnafw3tuv4qNxKdc/OHbXpbQQEAIpReCu+ulkDorVq1eDyRH50p9rJk+0FOaosGWMJrc/4bzhlnplOzGBtwe/iT0H/4Oaxc1l0X36DmDIgJfw1fRFLuvzFgVicHrLnbSjHZy2hNnyOK/k5s2bYbh+MskPRz/NEGaa/fTTT/UoIEdI2elxIm1XDE4aaYyd7Nu3L2i8GUK3YKaZ5wjmF198kcjF1SjH0V0KO3gzhA/uNHznzZsXT12tWrX0Z9aX7rf8QUgLeazcaDXaWUtnjo2+E+lUFTb8N0VNnaKy2KqpUwpmV34iFsKRzRNXQiz2OL658/Q8VMr/JGoXVRlWRFKUAB9m+J0N7OO4sWlUrKBKgjNAvR/48kXgyfatsW7NBuNQmq4vRYfj0o1Yz4nS/s1TpS58SBw/+Rv8ogxxs2TC8zGo/eFXKF2pTty8xmbpFj1CQAg4RoDuzHtO3sVNZf9lNvkpOfwCcOrSbSxbNV3Pk25ZMyZxtDRAbW0zTMcwUlmGIUQJhd5gCUdMkzNSWd7V6e0S1iO1P5MhBwX69etndwy07tMnTMIHKkmUtfh7Z9owoSNUn74YNRp+HS80zRld3nKOyf9K3oLFO9rBEUAGvxsjbxwpNFK6szMyslBOnDhR/3NaGkjsxCw/20uE06FQOAJovGE7eTLWVcwyjsaI++TcZIahZu0a7FiZRZdurNbSiLdo0UJ3LlOmTAETIBly+fJl0MWVLrSGfs5xxjhRzvdJoeuvs0K3XsbZcITVSHPPqVaMOdrovstOz5ifiWUYp5VaUr3Q83FxnWEX1zp1WU5lMvm+i22tIl20jk3Hv8K/J75xSl/Ck5hAqGKBtsiWMW/CQyn22T9rQFySJW77grR95lEUVLm26ndwvbWdPlHu6702ard2y/hu1zU7pyEkYjr+DBupTx7bOiaxkjpBABcbwmzaDCewjEu3UTRu94K5sxDUDmhfI26Xyxu1lNcndX4xZaIYnC7TFAVCwDUCZcqoN2xKdqtHFyb5MVOCDwI5s2VKZGzyGnxm4eJo2AJf6lsaoLa2+QzChIp8vjOM2YQhQOwL7TFMjTJcG4Zt9uzZzURlW1dfK339/dI0vvnMOGzYML2m4ZkczwULFqBV1Uz4vydv2b6mg0eMPv3LyePE4LzPTgxOB79EnlSco4IcvaQRVK5cOZ2FlpNN03h77LHHdDwkR+KYkZYdBt1omSyIWWjpbktj0NEseUb6eSYjorHFjnD06FgfBcaSUienXGAG3aCgIAwcOBA3b97UBm9CttHR0XHzl9EPPmGCH47UjhkzRicy4vxHdBvs3Lmz7khpgLIOzE7LfR988IFuFxMicR+NX17fWWEnywRECd10x48frxM1MbkQR5CZAZSjsH/99Rc2bdqkL+dKHKkj9c2XrSz61l2D5Qfexl/HxjtyalzZ28r9dc6urmq0cxuqFe6AxfvMG9aJunUajOfsUOXbuOul9AaNTCZZ8hXh93HjX9vQ0TkP60SY8hYHWg2Anh+QL6waN26cqIwn7WAfyf6B/SQNTyYeszV1k9GuHVv+RssGxifz1hUKAQdWHdIvwoyXYuZpF01CQAjYS4CJDgvkzYndEVGmG5yhyiGjTvXy9lbFrnI0Epl0zJnEYzdu3IhngFoao5bbzEVBDzZLY/b27dvx6se+09IAdWTbGKCIp9CJD6wfhc94HEzhMyLzZtDwbNRIZWqzIjv+XYeGhc0zNo1LsE/ftSwMnH4sYdiXUcaX1mJwevHdpv8/DTs+VBnChyr+ExojjJzqg53UqFGj8M03sSNXjJFkFliOAFoanPaMAtAwZDp3xnHSmKTQGKTRxUyXNGZpcNarVw/MZstRS9aPnUFCocFoxEPSWOViKRxh4eglDTluMzaVE2ZTOKWK8Q9uZP1jm5itluXZftaBWXztebiz1naOrtJgN6Ze4XWZDIgJi9544w3tdmu48/IY42dZz9R+SH+ywjjtYqvny1QGpDNCg3XTiQfJmpzRYe0cZrqtpEY5qxR82tph2ecige+mf6Wz0QbUdlGRxem11Ejcf5tz6hcuqf1dtqiGKZucGokvxebPn6/7LfYF7CM5hyPXRj9pXIxZfjduD0PVZ4w95q2ZEfP6jVt6BMLVDKXrrgZqDwy673V/EIVgXmVFkxDwcgLlK1TC6v3/ort1G8Wp1jN2c8PRDGjxXBunzk+Jk/icyDwbXBwVGnWWBqitbT4TWRqvtlyAaaA6YqQa5S3rzesYQmOawmdSzm3L2QU4U4PlPOm6T9+6D71TILrHyHLM51LjedSom6PryKw1MfLXGfq0Nq8XdvR0tyjvp1z9bI9Nu0UVpRKuEqDLBF1r+dbOVtYzfg3o+mq4dLh6TT7E0d2UxqthrBkdATsJQ1g3dj6u/jNSHw2/Cxcu6LkFbRmRLMMOhgmRjHoZdXFmzTd8dN+19maRTI3ETEy4ZKtOzlzXmXPOXtuLhXv6gq6y7iT5s5XD4CZ7kc7PufdffHFhjFZLd/bgzvJ/gd/LDh8Apes/2G/G1qrPgdzRjbF2Vdp+l1aFBSXtUptMYzlFEb0h6GVhCPsFfo+40Dvkueee08YnvUA4xUbrR5ri6mT15JgCkn1gOvw0Z2HcFE/OXsLyf2Lt2rUIDAx0VpWcJwRSnYDl9zet+nS+hOK83RNVLN6gluYg+GwN8NXm/Fi3aTcKFVLDXz4qfD60NEAd2bZ8wU989BizNFIZSrZr1y79ItEWXnqfcVCAAwChoaF6MGCTSmPSoJStM5zfn/11P7wzfITOneK8FugXiBzkoHhqn+7cE54r1OTcVCdAI7NixYpJXpf/tByFNEv40MZMsZZiaWga+1k3W0awUcbeNeMHkoshsKeMvddjObqBWDM2eYxMAwICuOkWwuQ//ev/g1/2DwSTArmLnL9+CMuVa227ChPcpUpeUQ/+6FKKVDC/Of5qNG7/byoYyQ2Frvhc+Hbbcm1t2/CgsGwGH4YMofcF5/9llmzO78s+rFyRjOrwAwPVKGvGunLRDHqE0wxdokMICAHnCTD0pudLnTD05zmmGJyXlHPRIJV47ffff/JpY5N3hM+HDLkyclw4cpf4ctDWSCoNV3q68eW+ZT+eUD+ny2OI1vDhw9GwYUN9uGrRhKXM+Vy5SIw2as3R5tlaxOD07PsntRcCDhPgdCfFctbS82hyGhR3kH+OTVSutU+ibF6TXiW7Q6PSuA6Ml2ayoKy5za9IgQBgzX/ntUFHt1RrwtF/W8afYQxaGoH27rM852JUBKKuAXeU/Tf5TlZtZFqri+U+PozQjYz1ThiDZFmO2wwp4FQF9A7h22W+XNoSvCRhMdM+n7lyz6mHMNMqIIqEgBCII/D9jNlYsWIlWoyPxNrBcbud2uj+A/DOq531/IxOKZCTNAGGSCXlAsxZEuiJYq1v57k0WDm40r59ex1yRo83hlqdjQJKZTYf8pkoP9R2wl3Z/JqkvUYxONP+HkgNhECqE6hbrGfs1ClqipPjl7ek+vWtXZAJhAY2DLV2SPY5QWDj5vUpMrrJqhS473rEt8McybdmLCacU85aE/gAQMOPi2EEGmvLfdxmuv6EZY9dXYuwqJXIkAnoUe/HRMet6eJ0SoYwnp1vuROK8WDChxLOYWxMxxQWFqbizW/jojJy85qckPH8VeD4hTsoX97chCIJ2yafhYAQsJ/A3xu26KSLnVVuu9m97T/PsmQLlbOP2WmX7vjZcrdspwABjn4abtjs6+liS2nevLnux5nXpFq1anFXZp9O2ROhDM78cbtN2dB9+sUYqxmJTbmAhykRg9PDbphUVwiYRaBozpoY0OBfLNr7CjafmGqWWqf1nFTzfq4KG6GyyI50Woec+IBAeuW2pMIQU0QMvTQ4mXLeMOyMdUJj0dhvrI3jrsZRrwq7rmI4V+o2dmjdIXFbo8KBq8cAeslmVfOYWBibLMx6GA8nHPlkorRixYrpJGY9VJIxxl5bCqdLKF4wNntls3KWR1zf3n48VgcTj4mYTOCkCp4r9ojJSkWdLxDgtHIcFev6TCCyv75JGZ0xdk+JNGk1MHSRH1o2rYf3nm+ls+W///77voAtZdt4al2s/kzKfSdfzXjXolst7xdjZI1RTBqZTGhpTXSfXiiPykgcibbVrZVwfp/Rp9epU8d5JV50phicXnQzpSlCwBkCz1b+GsVy1dKGpzPnm3nOn2GjUDF/W5TIXc9MtXG6IqK2Y9mBN/XndhUmgka3t0qTxs2w6tP5qnkPYhLNauvZ2JfCOuGO5YihWfpN03NwOhB6/wVG27VA0cB4qmlwGrE+TBDEWE1mp01K6tWurh5O1sNsg3P/aaBE0fw60VpS15djdhK4uBsIm6MWFTiX4yExOO3EJsUSE2AfN2/5Biyc/R1eGzQIv+y4iiZlgMoq7q+mSn2RhWHdSm6qCBXO3blbjZatPuCHVQezYdynI/H6oMGYOnWqTlLDRImcqkPEBQLLAmNPLtIcaBccTxFfgDIRUO3a9qdmr1e3jrpn6u2AycI+PV+e7DLCeZ+rGJwmf8FEnRDwRAINivdTxldsXOepqB1p2gS61varp4yDFJAbtyNx5GKw1sxtb5aaNWviXPg9RJ1TE40XMLel544CJcsUUQOGmcxVnMraOOJKl1rO08bMhfZIjYat8PPsULza/IY9xe0us3R3ZtRt0NTu8lLQCoHoM7EGJo3MMxseFKDBKSIEXCTwXOdeaNC0DSZ9+hF+3LgaofOPIir6DiqqZF/pVWjBnpO3kSt7RjSsWR7VGz6Cv2cOBEdIKUZSxo8//ljHtk+aNMnF2sjp1ggMGzbM2u4k99Wo1xTjxgZj05G7aKjyHpglS3f4oWmzQLPUebweMTg9/ha6VwOY1bFVq1Z6fk2mr3733XcTzWUXHh6u5/wsV64c6LZmKTyf7hCciN2azJkzBwEBAXGZxYwy9MNnNsnXXnvN2KXn92TweK9evVC6tPVe5K+//tIZfDl9i69Lidz10bvO7/jkb+VCdPdqmuE4cikYf4WPQ7OAt9OsDt5w4erVqyNjpvSI2HcXFUw2OC8o98/KlaqmOaZWZYKUC3aQ0/V44YUXHD53xIgRWPTzNxg45yQmd3L4dKsnfK7er4QeT4+QpSrYywTpnm86mqskJzvuu+maoNKNVcQ8MDLDl7hxPaVq3kCAhuO4z76KawpH07jQJZ9ze7PftSaW7vmfffYZONI5bdo0a0VlXyoT0H36nB+U+/MxrDPpsYN9+qp9Mdiz4BNTWlP4ysq4xFU5bh5WOgNN0ZuaStKl5sXkWt5N4IcfftAuaYzL4sjH6NGj8cknif/Z3nnnHX2MSUASCl1N+AbQljADGRN9JJS5c+diwIABOqOkcYxzFbEOhw/zn9O6nDt3Trs7REQoHxgRLN7bP02NTeMWcJTz3LX9xkdZO0GAMSs1a1XD4c1OnJzEKbdUev/I8JyoVrlWEqW8+9CoT77AFPVAYcTouNLa4xeB15Xn54xZc+0eZU3uegGZwhFYXrn7lUiupAcfj1BxmX/3A6b7K//FzoAYmx58Mz236ozP69u3r36xbcvYZOssDU5+/v7779GtWzduirgBgVEff4a/DgEjf3W9Mkaf/v23X6Ny5cquK1Qastw+rft09usZ0nBAwJXGiMHpCj05N47AyZMntbH56quv6qkEXn/9dT2q+OGHH4JzHhnCEUVOqMxR0KefftrYrdd79uzB+vXrsWXLFuzfn9jYYJrrrVu3Yt68eUg4+e+SJbFvtg8eVKngHJB27drpLGZDhw514CzvLLr6yAfYfXaxWzQuBjF6bk63qIwHV2LI4OHY8yewdal5jdjwM3/wcuGtt94yT6mHaWKcZ6fn2qLNZNd/QgMnpMPrfbvEZcL1MBSpW13GZYa8D8xVT12/tgT2qRGiW5dTtw5yNSHgBAHO4Ws55zgTlPFFecLnICdUyykmENB9esfnELTMdWWBakrxujUqoGcv9UJMJI6A67+Wcapkw5cJ0CWBQvdVCqcVmDx5st6mWy2FHeybb76ptydMUP+RCeS7776L2/Pzz+qpNoEcOHAgbs+qVavitjk6SSOVwrkHHRGOxHJkdNasWXE6HDnfW8ruP7ccfxz+n1s1Z9+5ZSp7rrgcuXJTOHn5i92ew18/uKLlwbnhW4F/5wNff/G9z09ePnvBMpxV82bmGeSHbU64ri7ZDmR73Q9Hzt3D5KmzHkCWrfgEos+qLCxTgF8eBhao6Qy2fghcVkMRIkLAwwhw/khL4Ut0hgLxBTxDiUTSlsDsuQt0BfyUnbhqn+N1+Vt1S/7qEfeIypuwcvV6xxV4+RlicHr5DU6N5jEWgcZikyZN4gLjeV1mfXz88cf1iCRHLn/88Uc9Qjlw4EBUrRo//os6vv32WwQGBmodzOhGA9VSdu9Wb7fvy8KFC41N/P7773HbISEhcdv2bhhZKWl0+qJcuRmBhXv7uGXT6Vp75aZK+yfiNIGfZi5Arhy5sTyxd7vDOucNA956e5BMXn6fHKdU6devL2orG4iuWNdvJY80Urkk95wBPKPCwAKbN4ubliX5M32sBBP//KFi+X8sBGwYqJIAyQOcj30DvK65zD+RUGho/vPPP/rZJzIyMuFh+ZzKBNinvzNY/cZNAt5Rj5lXbthXgeHKya7ZOJUMu0RpXLhwAfny5bPvRB8qJQanD93slGoq39BRmI46oRjxloxxoNtqzpw5YYyGWpZdtGgRoqKi8NJLL6Fz5844e/YsgoODLYtg+3Y1JHBfaBxGR0frT7/88ouxG5s3Ox6wxnn3KIxBtWey+riLecnGwj19lVF3yi1bc+POZXGtdfLOMJHWihUrwAQVbVq1xZ7VAA3G6CuOK1yv3sV80ib2vPGfJo6hdlyj95wxduLX+qXX7N2F8fCEbOg1U7FS78BW7gFOK9Zno4A1B4ApKuSwn+IYOCkrVh7MrT1Afvsj2HtAmNGSiLX34zLzqLjMTiou0z1c/M1omugQAkxemCFDhkQgbty4oV/Gt27dGpJPIhGeVN8xdtxE3acvPVwMzT/LmahPP3cVOt5z6l/AG+q9WOvPs2HMCmDw4MHYvT8MefPmTfU6e8IFE3/zPaHWUke3IrBr1y5dHyPtt2XlOIk5Dc2xY8fq3V9//bXVf0bup3C0kecwOdCMGTPQsqWK07kvhsH5+eefazfYNWvWoEWLFqDByfNoLDLLLdfWOnVDT8K10TnQ4D116lSi4P6E5b3p8x+H38f+88vduknbT/2MSgXaombhzm5dz7SoHJNeMW7ZWBj7TE8AZoK2fHkyc+ZM8H+s9ZMPY8rzO/D4W0C1+wZkUvWO2AesU57ux9W/eIeOz2L+3AeeBUmd52vH+KC4buN2MFRg/87NWL91Hw4sUq6gFlKlTGFUrFQZj3Wqh969e8dNl2BRxDc3LynPFY5mchFXWd/8DvhIq5k4iM8mln1zxowZdQgSkykyLj579uw+QsO9m6n79A2hsX367q1Yv2236tPVxJoWUu6h/KionlcrP1IXQye2i/e8alFMNu8TEINTvgouE+DDLSVhfILeqf4whpMGJ0c3jRhP4xjXfECmy+0zzzyjjdEGDRroyc/pgkvjMleuXLo4pzgpX748XnzxRW1wclSUGXEpjFVjjCcNzn379qFaNRXrY6f4qfmzaCyfOHECTH6UMJucnWo8rtjus4uw+siHHlFvutZWyt8WmTPk9Ij6pnQl+aJl27ZtuHz5sr4UH1qYlp9vyhMKX94Y2RA3rNuOyV9MwBsDBmN/cDr4F7+HvCWAgmXUUlqNfip158OBc8di12Gb0uGhgGJYsmSKfqmTUHdaf74UHY5LN1RllZT2b56m1SlUqFDcizVW5NatW3raA64ZJ+7pc5aaCpdxmTQwj6jltLjKmspWlLktAT5nGKFC7A/u3bsHzpdMby7LhEJu2wAfq5i1Pn3x4sXa2+5///sfsmbN6mNEXGuuGJyu8ZOzFQEjY2zu3Lmt8jAMxgIFClgdeWR6cAoNTsZyUpg9lnGhNCp79OihjUGOQDIulNneOKLJbLeGPPHEE3EPdBwJdcTgpA7Ow0mD02iLoddb1xejj4KutJ4il2+cAI3OZyp/5VKV/bMG4NEyI7QObnuqDB8+PF4cJZNPcEkoQ4YMwdtvvx1v98DX3kLdWg3xw/TvsWHzWvy59Ei84/xQoEhu1K5TEx3faWHVBT7RCWm0IyRiOv4MG6mvPrZ1TOJalO+h3oQFxu7PXzPx8RTcwwfKTZs26T7Fl41miLUAADHYSURBVDP6xkNsjGSKq2w8LPLBNwjwZTb7ab58p+cXX9LzJTxfdHNechE7CbRVrveUzMr1PhWFfTrnXF29ejU4hZ+IYwTE4HSMl5S2QoBvgShGTKWVIjZ3GcmCWIDxm1wshcYoDU7DbbdGjRr6cKdOnbQrLY/TOKURahiZTBxkjOhY6kpq++pV5ZSvhEaxL8iivX1x/fYFj2rqphNfo2KBJ7V7rbMVp5HZqkyQs6e7zXnMavjss8/qEX1r2Q35w8j/EWvz4LIRjRs31gu3+SLHmLz8oYceQu3atVGmjBry9AbJGQBwSQPhy6s5c+boEY3z588jf/78aVALN7gk4zINQ/NWpBtUSKogBNKGAA3Ojz76SGfrN0bH6MVFV/yvvnLtZWratCiNrlo0MI0urOZLVqFezDHCZ1LjmTPNKuNhFxaD08NumDtWt3DhwrpahtHmSB2NZEHMZktXWUsZNmwY/v77bxw5cgQ7d+7Uh4x/cI50GtKxY0e9WbZsWb3euHGjccjutZEdjiOd3i4cKTx0QU3O6IGiXWtVPKdIbIIC/v8klCxZsuiM0fxhtEf4tp3ZobmImEeAc+wx4yHdnbn92muvmac8GU3HbgXoON7tx4HYV3TJnGD24UsqY5JhZF4+aLZ20ScEPJIAEyvSO8VSOFUcX7Qz4YzxDGN5XLbdhwCT8NHY5G8sXyYaz6OpUcMbGQtj3f2uNHvJHKlxSdOvERsAZ7paUehLBBhzSTl69KjDzTaSBdHlrGvXrvEWY7STc3Ju3aomAFRSpUoVveZDsmFoGsYng/Hr1aun59Nk3JQhzIrLyZUtF2bNNYQjs+xEmEHOMJ6NY9623n56DtaFf+qxzTp7bZ92rfXYBphUcWZ/pls5E0xwNNMQbvN7bM0QNcrIOnUIMFETY7ToQmev8W9WzaZf6IHA8cCgeWZptEPPjXPAns+BpU2B+VXVfJkfqCRAYmzaQU6K+AgBGioJhd5YjOO0Njd5wrLyOW0J0MjkC0TmSmD/nppyOtdjuk9nv341c+zgSmpe34xryQinGRR9XEfTpuoBQ4nh9movDmbUZLIgY4Ql4Xk0EEePHq2nKzGyzlpOvUKXQRqLdKc1pG7dutrgZAIhQxhHlVD4UG7I4cOH9SZjSL1Zzl07gEV7+nh8E2kwM2ttKf9mHt8WRxvARFr8n+CoP9+U86WMpfsr46WXLFkSl2jLUf1S3hwCfPm2bt26OGVbtmzB3r17Ubly5bh9XrMRpqxaJv85mni03WvaKA0RAilIgC/c2ZdzLaOcKQjaBdU0MmfPnh2XK4E5P/788088+uijLmj1rVNlhNO37neKtDZz5swYOHCgNh4vXbqU6BrMJEvXMs4LaCkVK1bU+69cuWI1mRBHK43zaEBymxllDWnbtq02Ro3PXH/55Ze6HF0dxo0bp7d5XsLFsi4MAKcYI6r6gxf+Wbi3D27ejY1V9fTmLVduwb4kdOXhix1+Rxm/eezYMZ3Mp1SpUnjvvff0KCez1NLYlOQTaf/N4Jtw9ouGGC5YxmePX0cEq/ky+6uAJjXf3OoXxNj0+BsqDUhLAjLKmZb07bu2EY9vlGb/zlAJEfsJiMFpPyspmQQBpojmSCX/KVNL6NqQL18+ly7H+bC++eYb9O/fH9WrV3dJlzufvHT/IBy99Lc7V9Ghuh2/vFlN6aJc9rxcODLGkXdmYWbCCXoRTJ48OZ7rN0c6c+TIgXnz5unYTS9H4hHNowutZTInvh1Pbbda00Fd2guEjADmVgR+bQHsU3Mn30z8gtH064pCIeADBBjLycRBli/DfaDZHtPEWbNm6RAJo8Ls33/66ad4+4xjsrZOQAxO61xkr4MEaPjxbU9QUFC8By0H1aR6cdaZcXDMHOetEhoxA+v/+8zrmvfH4f8hImqb17WLDaJLZp8+fVC/fn09rcbatWvBWOaqVVVsXAJhtkN+f5m1ViTtCdCF39Kl36jRf//9h2A1355HSby4TBU/v3WUist8EK7gUW2RygoBNyZA7xVm4ZdYTve7Sey7DU84y9oxpCs1B1ksr+2J22JweuJdc9M6M9MsDTi6r3qKlC9fXvvhW8aBekrd7a1nnaLd8W7To3iq4hRUyP+Yvad5RDlmrXVEIqK2Y2pIC71w292EU5QwOzNjjDlVCeea/eOPP5LNIPvKK6+4W1Pcoz6hQcA05YbPJSI4VerEPtBachC6YHnMw8kRFZe56jlgpsravf514PQ/qcJOLiIEfJkAYzgZFiSjnMl8C4w+fVlgMgXNOcx+2zJEwtDKEC+OcorYR0CSBtnHSUrZScDTplZgnKgvCOefbPzQAL3cuHMF+8/9iv3nl2PfueW4ceeyxyLg9C6Zitjfjd24HYkjF4N1e7ntTjJ+/HidEIgj7nStEiPSne6O/XVhYie60CYUumDRLcvIzJ3weJp/jgiOTf7D6Uy8zVU2Yo166SDv19P8O2ZnBUYUicH/lMe2Fh+6b5yFfHxxYGLvsvi884N8FfdJyCqNCCQMkTCqwcGV3377DadPn44X5mIcl3V8AvY/qcU/Tz4JASHgoQSyZMiFmkVe1AubcPD879injM/9yvi8GH3E41p1q+Qq5CkKREZ4XNV1hfljxsyzx48f15lnOcLJBEAiyROoW7QHyvgHJl8wlUosW7YMFy5csHm1a9eu6VhbY0onmwVdPNA8RzBGqOlqj11MRhHjMmlgMstspLe7ynqO500yd80nDj/ID+hb9+2tR4Ee04G3Ho1B6QI+cavdupEhISE6w3hSleQI6KBBg5Iq4vKxPNe36z6dirLcPu2yvrRQIAZnWlCXawoBNyJQPn8bcHmq4mQcv7xFj3zS+DxxJcSNaplEVfzuIrA3sESFl3mSLF++XBuaGzZswBtvvKFdaQsVKuRJTUjzunLknou7CDNy//rrr3HV4XypHNl899134/alxkZgzmAEtoOeKDzR4/qN87FGJg3N096TSCw1uMo1hEBKE+jeCJjwZ+zyeeeUvproT47AmTNn4vXpnH+Tyfs+/TR15zPPE70dQapPp2wXgzMWhPwVAkLAcwmUyF0PXFqVCcKF62FxxufBC3+4daPKNwGqtUnZKl69ehXbtm1D6LYt2PjvWuzcsQf7dx9FzboV0bBhQzSu/4hO+pBctuPNmzdrQ3Pp0qXo0qULpk2bhipVVEIWEY8n8OSTT8ZrA2Nwr1+/joT74xVKrQ9H5scamkcXptYV5TpCQAg4QeBNNcrZc7oa5WwFlM7vhAI5xTQCCftuviDmHJwJ95t2QS9WJCOcXnxzpWlCwBUC+bKVQZOHBuol+valuJhPxn7evBPliuoUOZejnFdvnUWOTCrRicmyePFiDHyzP67duIychW/AvxhQvBlQsztw4dh+bFbLyn9+wsX/0qNihQoIen8MmETLUpgIgq6z33//Pdq0aaMzljZv3tyyiGwLAVMJ5MkGFDg7Sc2XqRIA3UzOv9bUS4syISAEnCTQg6Ocq1QspxrpnNLJSSVymhBwMwJicLrZDZHqCAF3JJA1oz9qFemqF9Zv//nfdMwnjc9L0cfcospZcwHMWtux6nRT6/PmsF6YMu4HPPZWDKq0TKz6objpW2+rg7ex+usdet7M/q/1xZefT8WVK1f0tCWffPIJ6tSpgwULFuC555QBICIEUphAwZzK4LyinlrvXUvhK4l6ISAEzCTAWM6eMxjLCZSSUU4z0YquNCIgBmcagZfLCgFPJlAx/xPgAnyB/y5v0sYnEw9FXEnbeTE552ilAm1RrVAHl/HSfbZxk4YoVOEWXvkRyJ7XPpUtXwGKVlRJMcd9gzmz1TRB9/yQO3dunZ20X79+9imRUkLABAIHzwAH6v2KwBJqgzGb4YtN0CoqhIAQSGkCPRo/iOWUUc6Upi36U4OAGJypQVmuIQS8mMBDuRuCS+uyH+D89YN6qhUmHTp8cXWatHr5wSHa6MyQLotL169duzaadAOadHVcTaVAoFJgDGYOiEL6GwVx9OhRcM4uESGQJgTKvABwiT77ICvt6fVpUhW5qBAQAvYRYCznyzLKaR8sKeX2BGRiKre/RVJBIeA5BPJnK4+mJd9En7p/YkSLc9q9tXqh55EpvQomSyW5FB0OGp2uSL1G1ZFLJYx1xti0vG7n8fdw8vhpTJ/5reVu2RYCaUMgq4pvrvo60P4f4PldQO3/A3KXS5u6yFWFgBBIkkBPNcpZVU35xVhOESHg6QRkhNPT76DUXwi4KYFsGfOjTtHueolBTGzMpxr5pOvt5RvHU7TWG/77XI9yls8XP3Vtlox5UNo/NlEPt63JmDFjELJpFzp9au2oY/syZgbaDVNvqXv0RZ1aDZBcBlvHtEvpVWFB+DNspAYxtnWiCUCA8j2AIoGxoPLXjF3L31gC/lWBulw+ACLWPJgq5dZlISQEhICbEGCmWj3KqdYB+dykUu5QjepvA+W6uENNpA52EhCD005QUkwICAHnCfjBTxuAjK98Rqk5Frkh1vVWGZ+nonY4rziJM5lAqHyj+AZn0Zw10a9esM2zGLf54Ucj0aAj8CAZkM3idh2ge+3hjVBZbl9B8OoNdp0jhUwikDMA4CKSNIGij6jAY7U0nfrA8PSmeE//yiql9NCkGchRtyGwZPESMDM4ZcZM5VPqw9KzhYrl3DAcE/ZVxuT3nYjv8GZ2+eQloifdXjE4PeluSV2FgJcQKJmnMbg8Vu4jnL22Py7jbdjFtaa18FTUTqw8NFxdY7TdOjlHZvpM99C8l92n2FWwdnvgp7c2IjQ0VGeqteskKSQEnCRw7FYAwsPDsV05EtRwVEfCeE8mGzqz3lEt7lU+a2E1GvKSe9VJamOTwI7oo5i56b7BKfcNb757B7169cJbo6YhICDAJjc54L0EbmQsjHUHY9uXvWQOj2yoxHB65G2TSgsB7yFQMHtFNAsYjL511+D9wDN4vsr3Ksvsc3A16Q8JrT06Ro+m2ksrZOtG5CnB6U3MlfwlY/XR4BQRAilNYPqFHggcDwya58KVjHjPp1S8ZweJ93SBpJwqBFwi8PLLL6NKlSqYOHGiS3rkZM8lcDrXY7pPZ79+NXNZj2yIGJweeduk0kLAOwnkyFQQdYv1RNcaC/BByyh0r7kE9Yv3Qa7MKnOCk0LXWntl4+b1MIxDe8+xp1xm9UIyb3Fg0xb18C4iBDyNQN6qsbGeL6hX7G1V9ulKfYFMuT2tFVJfIeCxBN566y1MnjwZx44d89g2SMV9m4AYnL59/6X1QsBtCaTzy4DKBZ/Cc5Wn4b3mJ/FKvb8QWGooCudQD78OSHjkejXS+XGyZ1y9ehV7d4aliMHJi+d7CAgJ2ZJsPaSAEHBrAkasZ49LQMvZKpPJ025dXamcEPAGAhzlrFy5MiZMmOANzZE2+CABMTh98KZLk4WAJxIo5d8Uj5f7GG823oW3Gu/GE+XHopR/M7uasvLQMJy+ujvJsiEhIfp4vvvur0kWduJgwTLAru37nThTThEC7kjAT83t2QlorWLtup0GGn8GFGrsjhWVOgkBryAgo5xecRt9thFicPrsrZeGCwHPJVAoRxU0D3hHjXquQ9aQt7FShbYcUglg06fLaLNRvyUzN2fdunX1uRdSyGPpbBhQrWZFm/WTA0LAYwlkVZPWVh0IPKWSC3XYCdR6D8jlmXFGHnsPpOJeT4CJgzjKKbGcXn+rvbKBGbyyVdIoISAEfIaA3+0c2LkSerlz96qe53O/mu+TS9QtNfJyXw6cX4nVYaPUjKCx8zVyjlD/rAHGYeTIkQOVqpXG+WNH4vaZuXHhP/U8/lg9M1WKruQIhAYBoSNjS7VVGZCLBiZ3hhx3lUDeaipgWS31PgROqnjPIyrLLTPd3rriqmY5Xwj4PAGOcvbu3RtcP/SQitPwRZmmvCsoRZqria6D9ab8cX8CMsLp/vdIaigEhICdBNKny4SqBZ9Bhyrf4v8CT+k5N5sHDEHB7GoePiVrjo7GqrAgvVyKDtf7LP80athYGZyWe8zZvnkVuHgCaFjvYXMUihYh4AkEirVUc3tOA7pH3o/3fMoTai11FAJuS4CjnJUqVZJYTre9Q1IxWwRkhNMWGdkvBISAxxMo7d8cXJ4o/4mK4dyFDf9NweYT39hsV73aTTBvwRx1/I7NMs4cMIzYOnXqOHO6nGODQN2iPVDGP9DGUd/d3TxHMEa0BY5ddBMGfmpEgvGeXKLPxI54hqn/szMb3aSCUg0h4DkEOLrZp08f3x7l9JzbZUpN81zfrvt0Ksty+4HnlinKU0mJGJypBFouIwSEQNoSKJyjGmoWfjFJg7NBgwa4He2Hdd8BzXuZV9+tS4EmzetBDE7zmFITXaIt3aLN1e64toiICGzduhWh/25A6OZ/sPrvfxGjPLg7Pt0a9Rs/guo1a6NGjRooVEjFPKagBOYMRmA76InCYx3IU/Bijqo24j0Z83lRze9Jd1san1dUkLOIEBACyRKgSy2z1TKWM2E8Z1wfpOZ8Dt0YjNDtu8D3PfVrV0PdRs1RtXptVKtWDaVKlUr2OlIASMhz7T+bcefOHTz7ZAvUbdgMVWvUSRWeeaK3I0j16ZTtYnDGgpC/QkAICAFPJVCrVi28M+RdfPDBByilwi0fqu56S/YFA1w2bZriujLR4LYExn3yMYYMHabrV60YUEeFV33xApBBBa5sPLIKs79YhSEqjpfy9uDB+HTcuNgPvvw3YbynYXzejvJlKtJ2IZAsActRzhIlSujy41SfMmTIEL1drZif6oNi8NHjQMb0wJajwfj1h2D8L9wPd+/FoMuLnTB5yhfImzdvstfy1QLWeE553oLndAuenTti8udfCc8kviwywpkEHDkkBISA7xEYNWoU5syfid/GHcMrM11r/+2bwLIxwNhxo8DRUxHvI3DmzBn0ffExrFi3HVOUx2hb9ZIiIF/8dna9f+tPqKkr56rZd4ZOHI/F837El9/+iNatW8cv7KufGO+pYz6/fjDqeUy5BogIASGQiABHOcePH69HOt9991307doWS/8MwWfqJVf7GuyD4vs2dKkfq+LS9RisPQAMmj8P5Uovw+QvpqJLly6J9PvyDt2nd2uPpav+tZ/nvAWK56+K5zThaePLI0mDbICR3UJACPgugaWLV+KKCjVbP8s1BnPUy+Zs2bPincHvu6ZIznZLAt9//z0KFy6MjJHbEfYhMKBFYmPTsuLF/YHBrYCQ4SqRa/qzaNOmDTihu4gFAT/1WFK2M9DmF6DrKTW/5yQ1v2dDiwKyKQSEAAlwlHPSpEm6Dzp5KAR7g4CBjyTdB/lnA56tBez4v3uoU/QaunbtqhchGkvA6NNPHvzXMZ7vk+f1WJ5iwFv9OonBaRWL7BQCQsCXCVSsWBFBQUFY/6MakRoKXHMw+QpdaMerpC2n1Jvk69eiMWWKuNN62/dpxYoVYMbImT2BBf2AEg54ptVUHnD/DgMmdgR++OEHfPLJJ96Gx5z2ZCus5vd8Q80npJILddih5vdUlnqu0uboFi1CwMMJFC9eXLdgkHIO4EusSkXsbxANzz8GAV++CPz000/SByl0Rp/uMs+ffxaeVr6KYnBagSK7hIAQEAIjRozA4cOHkf1uWXzdzQ97VtvHZLXyCKQbbb16dVXCmBh8+umnGDhwoDZg7dMgpTyBQMcOT2O4io/q5sLgGx9spnZVLrZDh2Lnzp2e0Oy0q2Ne5atc7yOgUxjw5CqgYm8VTJUj7eojVxYCaUyAfVDdkrEvrpytSv/m0gcZ7Dp2eEZ4GjBSYJ0hBXSKSiEgBISAVxAoU6YM9uw8hJEjR8aOeE7PhFxFbsFfJYUpEADkV4n+LhyDnruTU59EHs+Mu7f8MHr0/zBsmBrCUvL2228jX7582nXywoULMtqpqXj2n47tH0EN9T346GnX29G3KfD3IeDV3l3wz7+7XFfoCxqKPariPdXSdGpshlsmG5J4T1+489LG+wQ6Pt0GfvduIXiw60ji+qCXO+GfkL2uK/RADR2feULxvGkyz46K534PpJEyVRaDM2W4ilYhIATckECWjHn0vJysGrftFY52NmnSBAcPHsTe/TuxZ/92HFx5DH8eOY0y5YujQuVyeKJ9Q5QrWwHVq1cHs91aSs+ePbXR+fzzz4NG58/K5UbEdQKrwoLwZ9hIrWhs6/hJMvTOIoEqXez96+QMuL/h2oousGvXrcda5Y5mlox5Bnh4wmGdYZIj4iJ2EtDxnsonsKxarqu56Ti9Co3Ps5vsVCDFhIDnEWAfNP+XP/DdS0D2zObUX/dB4494Rh9UZ0Rso6306efPn0f+/PkdgqJ5LlmRAjyPegZPh2g5X1gMTufZyZlCQAh4GIGiOWuiX71gp2r96KOPgouz0r59e6xZswY0OpksZv78+ciVK5ez6uQ8ewgUDQS4mChrf/8F77S6hapqlNssYTKhVx++ge+XLNAu2Gbp9Sk9jPespt4CcLmg4j1peB5Ry5UjPoVBGuv9BNau+g3PqxdpLzcxr626D2p6E98vnuv+fVCdIJsNX7JkCZYvX47OnTujY0cVJG+HrF39B56umRI8b+GrOT+6P087GJlRRGI4zaAoOoSAEBACdhDgKCmNzpMnT+KRRx7B0aNH7ThLirgTga0hm1G1qPk1qqjspQOHwxEVFWW+cl/TmE/NC1F/dGy85xN/qERDyvddRAh4CYGtWzagUQrkztJ9UNhxj++DfvvtN7zwwgvImTMn+vfvj+Dg4CTv/NZ//0mxPj38xBntGZVkBXzkoBicPnKjpZlCQAi4BwFmwF27di2yZ8+ujc6QkBD3qJjUIlkCnJ9tT9hpU0c3jYtWj004acrDychTQfBTmXMDxxvafXhdXM1D0+xbHwYgTfcmAroPOhyBhilgcJrZB7kD86tXr+os4C1atEBAQAAYGrN3b/wYVc3z0IkU7dNDQ0NdxhGer4fu09mvR2ZTw7EeKOJS64E3TaosBISAZxMoUKBAnHtty5YtMW/ePO1m69mt8v7ab9++HQVyZ0AJ/zumNzYgn0pClTO9Njjr1FH+ciJCQAgIgQQE2AdlzphOjXDeS3DE9Y/sg/Ll8AOvwYR5nijR0dHxqn3z5k39+dixY3qqklGjRqF+/fp46aWX9Cgo20qpkgJeK7pPV4m0aXDSxdfXRQxOX/8GSPuFgBBIEwLp06fHokWL0KdPHzz22GN6LrQXX1TJT0RShQCnIaGxnyVLFmTLlk0vOXLkABe6YnEE2tjPNT/TNSt/jpRzDKpaPIMpI5ypAlAuIgSEQKoT2LRpExqVz66umzKu91WK+KF37956SfXGmXTBjBkzWtV048YNvf/ff//Ftm3bMGDAAFSqVEnvS4kwCSqmXjNGOHUlPfyPGJwefgOl+kJACHg2gW+++UZnsO3SpQsuXryofwQ9u0WeUXtmE86dOzfCwtS8jlYkXbp04EsBPz8/vdAVum/fvhi18U8rpc3ZtffkHfQuW9YcZaJFCAgBryNQr149fDLmeoq1a9/pGPzvf/9D69atU+waKal42bJlmDBhgtVLsD+/e/eu7ve7deumRzivXLmCJ598EvtOAZWKWD3NpZ17T/nhxSc80wXWpYZbOVkMTitQZJcQEAJCIDUJfPzxx9rofP311/W0KYw1EUl5AsxiOHHiRBhvvi2veO/ePXCh8I0/XwycOHECr732Gk5fAQqbnGA4IlLN5nHlLsqKwWl5G2RbCAgBCwJ8UXb95l1s/Q+o/ZDFARM22Qedi4rBE088gQYNGpigMfVV7Nu3T78gtLxyhgwZcOfOHTzzzDPatfXZZ5+NO8w+nbI3BQxO3acrngmnSYu7uI9tiMHpYzdcmisEfJnApehwhEbM0AjqFO0O/6wBboNjyJAh2ujs1auXNjonT57sNnXz2IocnA4cjL3faDRRBSjFf9PMZBJjxoxJsnm8D3wRQClevDhKF/NXb8MvmW5w7joZW41y5crFbshfISAEhEACAroPKp4P/4ZfMN3g9Jg+6NcWsVTyqmzUjSclIBT7kaESfJHYqFGjuHhNf381/1QCieVZAPtOn0twxPWPBs+aNeP/7riu2TM1iMHpmfdNai0EhIATBGhwrgoL0meW9m/uVgYnK/Xyyy9ro5NzdV64cEHHderKyh/nCESFAxHBsefejNRrTklDt6ulS5dixYoVyJQpE27duhVb5v5fPqxw8vBZs2ahefPm8Y7xbfXeU2vQokK83S5/OHgWKFm8MPLmzeuyLlEgBISA9xKoVbsu/j36O15pZm4bdR9UrKD790ERwbENj4mxCoBGZPfu3bXLbIUKyXfUterWw76I36zqcmUneRbImwsclRYBUi77gdAVAkJACAgBhwk89dRTWL16tc5iy2RCMi+jbYRl/APxaJkRerFdKvbI/PnzERgYqEcphw8fDmYKXrBgAfr164fMmTPHO71NmzbYsWNHImOTharVbYZZoTnjlTfjw+LdOVG7XiMzVKFkpnA0Lw/ULGGKOlEiBISAGxGoVrsR5oamx1+HzK3U4l1ZUbt+E3OVprI2GpiMy2ccqj3GJqtXrWZ9LNmRAjx3ZsLDzVuaQiDL7dO6T2e/nuHeVVN0prYSMThTm7hcTwgIASGQDIGmTZtqo/P48eN6rs7w8PBkzvDNw6XzBqJVmSC9JCRAg/Gvv/6K2z1v/jydkZCTgjM504wZM/Dcc8/h6aefhpE6n4UZP7tkyRKbb/l5PCZbYfSYHqfa5Y0xK4HNh2/i008/dVkXFfTINx3Bg4FJHU1RJ0qEgBBwIwLsg6pVLodBc82rlO6Dwu6a1geZVzPHNPG301HRPKtUMJ/nkRjTeBa+slL36ezXc9w47GgT3aK8GJxucRukEkJACAiB+AQqV66sRzmzZs2qp++Q1Orx+Vj7tHHjRnD0smrVqmDcDI1OQ+bPm4+vvvoKjz/+uLFLrx955BFtXHI6lIULFyIoKCjecWsfPvv6R8zYCCzcau2oY/tCjgHDFwOzfp7jsXPfOdZiKS0EhICrBD77ajq2HQe6T3dVEyB9EPDZl9+nAM+50qdbfD3F4LSAIZtCQAgIAXciUKhQIW10MgaEc0b+8ccf7lQ9t6gL3Y8HDRqE0qVLo3HjxuAIJrMRcq41I9lPchXlSOfWrVthmb0wqXOYwZGGaYepwHkXvJtU3g/UGw10eqGjrnNS15RjQkAICAGDgNEHzVQvvn7fY+x1fH3i0v0+qOOzPt0Hmc6zw1M+zdPaN1EMTmtUZJ8QEAJCwE0IMKX74sWL0aFDBzC2cPbs2W5Ss7SrBpP+cE7MwoUL49FHH9XGZZ8+fbB7925s374dH3zwAThfnb0ybdo0OJodlm5Y7ds9iQLKxen79fZe6UG5Pj8CpYYDBQvkx+w5JvrGPbiEbAkBIeDFBHQf1L49HlMJzd9b4nhDhywESrwL5PXPg9lz1QcfF1N5znfihng5fzE4vfwGS/OEgBDwDgLffvst3nnnHbz44ov44osvvKNRdrYiOjoac+fORdeuXZErVy60Vw9ZR44cwbBhw/R6w4YNertKlSp2ajSn2C9Lf8Xvv/+OXjOBp74E1oclr/fPfUCdj4Bv/wEGDhyIM2fNT8effC2khBAQAt5A4JdfftF90OgVQLNxsCuR0GGVPbXt58A45TDTpUsXXLiohjlFNIH4PP0c5/liJ+Fp47sk06LYACO7hYAQEALuRmDs2LE63nDAgAF62hRm4vNW4bQwnLrEmMLk7t27aNeuHcaPH68NTrobu4O0bt0ap0+fxpCBvfHwJ7+iQuF0qFDoHioXAWoUB9Kr17qcVHxPBNfp1PoeWjatjw1zJuk54tyhDVIHISAEPJdAXB80qD+aj1uMCkUyokLB23F9UIb0wP7TalH90L4z6bBfLaVKllDTPn2gDU7PbXnK1DyO55uvKZ4LUaGo4lnAFk8/xTa94llc8fxQeCZxS8TgTAKOHBICQkAIuBuBoUOH6rk66UJKo+yzzz5ztyo6XR9m5TWMTI4cMmESRzOZUZbrnDnNn47E6cpanEjjd+bcZejQZSn279+PAzv+wd9q/e2mCPj5+aFKueKoXL0KenVpgrJly2rD2eJ02RQCQkAIuERA90GzF6FD5/t90K7N+HvfHtUHnQD8gIqli6JSxYro+ExjlC5TRucEkDl/bSPXPH9egA6dDJ7/Kp67E/CsoHg2UjzLCU/bKOOOiMEZh0I2hIAQEAKeQaB3797a6OzYsaM2OmfNmuUZFbdSywMHDsQZmX///bduF43LRYsWaSMzfXr1et6GhERMR2jEDH20X921iUsVCVT+q/d35wxIfNzkPaw3F+AdkzWLOiEgBIRA8gQe9EHJl/XYEnVGxFY9Vft0j6XlNhUXg9NtboVURAgIgZQmwHkbx7aOSenLpIp+ZmJlhtbnn39eT/Uxf/58cGqPlJDIyEgcPHgQBQoUQKlSpVy+xLZt2+KMTE73UqJECW2ovffeezoxkr0XuBQdjiMXg20XLxoIcBERAkJACAgB7yBQJ8g72uFjrRCD08duuDRXCAgB7yHQrFkzPW0KjU5OmzJv3jyULFkyXgOHDBni8OTTTMLDbLjbdm/EoYOHcTbicpzOLFkzokyFEqhaqSYqVagOZvazR9avXx9nZO7btw8VKlTQRubEiRPhzGTd9lxTyggBISAEhIAQEAJpT0Cy1Kb9PZAaCAEhIAScJsDMrGvWrEGmTJm00cn5JA0ZM2YMxo0bh5UrVxq7kl1//fXXaNKkCVZu+hbRuUJR/ZnLeHECMEhlee+nvFeffPc28tU+gl1nFuHjTz5Epaplwcx+1mTVqlV6LsyAgAA8/PDD4Ge6AYeEhOhYx08++USMTWvgXNw38lQQ/PoBgeNdVCSnCwEhIASEQJoTCM/XQ/fp7Ncjs9VM8/o4UwEZ4XSGmpwjBISAEHAjApyPkkan5Ujn0aNHMXz4cJ20ZtSoUXjssceSrPHVq1fxWLtArA8OxeNvAdXa3EhUPlNWIHdhoEzD2EMR++5g3XdhePrpp9GpUyf89NNPcVllmV323Llz2nh99dVXdaKcSpUqJdIpO4SAEBACQkAICAHvJiAGp3ffX2mdEBACPkIgY8aMWLJkCXr16gWmdTckJiYGGzdu1O62HF20JseOHQNHIQNqA6/PB7LmslYq8b6iyn7srOZ+Wz8LmPPjHMyZM0cXatWqFThlC6cxSejim1iL7BECQkAICAEhIAS8mYC41Hrz3ZW2CQEh4HMEunXrZrXNQUFBVvdzZxM1L2SlQKDjGPuNTUtlTbqqvKy/x+4ZNmwY/vjjD3CuUDE2LSnJthAQAkJACAgB3yQgBqdv3ndptRAQAl5IYOfOnXjqqaestoyJer799ttExwa80Rcnj59Fu2GJDjm849kggHGjlnGkDiuRE4SAEBACQkAICAGvIiAGp1fdTmmMEBACSRGIvhOJI5fW6YXb3iQnT57UWV+vX79us1kjR46Md4zzXn4x+Rs0sT4oGq+sPR/KNmLsJ/DmO/3tKZ7yZQ5OB35tEbtc2J7y15MrCAEhIASEQMoSMPr0DYNS9jqi3VQCYnCailOUCQEh4M4ETl3ZjqlbAvXCbW+Sixcv4rnnnkPWrFl1oiBrbTt16hQmTFApZ+/L6LEjUaK6cqlVLrFmSQMVJro1dDsSGrdm6XdIT1Q4EBEcu9z0rhcMDnGQwkJACAgBbyEQERzbp8tLRI+6o5I0yKNul1RWCAgBIWCdQLVq1TB+/Hi9zJgxA1OnTtXJgrJkyYIbN2Izzt69exfMWNu/f39tmG7auAkNu1vX5+zevMWBEjVvYdfebc6qsPu8Mv6BKmWuncX5VtyatF0LFA20diR23zQ/28csj9RR85EmNSH5skDg1DrLM6xvm6BnRBEguLxaDlq/hOwVAkJACHg8Afan1vrnIs2BdsG2mxcaBISOtH3c8kjfGMtP8bcjgmO9Z+Lvtf4pKT08w1o7LDQFqu3gwZ491ZWMcFrcUNkUAkJACHgDge7du2PDhg0IDQ1Fnz59kC1bNqRLF9vdR0dHY+zYsWC8Z+TFa2CmWbMlbwlgz96UH0EunTcQrcoE6cVqG3IGWN3tCzvXqoeTSdaTEvtC86WNQkAIeCuBfDW8tWVJtqu5eokYMxXIcO9qkuXc9aCMcLrrnZF6CQEhIARcJFC7dm1w4cinMeoZEhKC0aNHI3/+/MhVIB3yl7zn4lUSn56/JBAyPyLxgdTeE/A00Ei50t5Siy1JzijliKM9UiQw6VIVeiQ9kmqcbYKedevWITg4GJNWA4vdJJzWaJ6shYAQEAIuEWg0SXmLBNtWkVyfzj62ju3T7T7C69j7+5Cc0mT0hIeH69/w7ceBNybmSE6bWx4Xg9Mtb4tUSggIASFgHgHO0dm7d2+9bN68Wf9wTftmKopUMt/YZK0LKjfXmzduIywsDGXK2Ovzal574zRlyqOyGA2K++jURlJuso4oLN/DkdK2y9qhJ/jXkQhaFmxbhxwRAkJACHgqAYZAJBUGkVy7XD3f0K8NziDjk2vrZH5nwqOCVZ8+Q1/jDdeulGZni0ttmqGXCwsBISAEUp9AgwYN8OWXX6JYsSLImCVlrp/dP1bvhQsXUuYColUICAEhIASEgBDwGAJicHrMrZKKCgEhIATMI9C82SO4qNxzUkLOHonVWr68CjoREQJCQAgIASEgBHyagBicPn37pfFCQAj4KgHGdkbsU66vtqftdBrN2TCgUFF/5MmjXFpFhIAQEAJCQAgIAZ8mIAanT99+abwQEAK+SqBGjdhMfzQ6zRaOnJavIKObZnMVfUJACAgBISAEPJGAGJyeeNekzkJACAgBFwkUKlQIFaqU0qOcLqpKdPrVU9lRo0q9RPvN3nHkYjD+DBupF7N1iz4hIASEgBAQAkLAHAJicJrDUbQIASHgAQQ4b+PY1jF64bavS5+er2H9j0DEfvNI7PodOLjlGjp37myeUhuawi4FY1VYkF5sFJHdQkAICAEhIASEQBoTEIMzjW+AXF4ICAEhkFYEBg8ejIZNq2Pdd+bUIPoKsGIC8NVXX6Fx48bmKBUtQkAICAEhIASEgEcTEIPTo2+fVF4ICAEh4BqB5UvW4vhO4PAm1/Tw7GVjgIcD6+GVV15xXZloEAJCQAgIASEgBLyCgBicXnEbpRFCQAgIAecI5M2bF7NmzcKiEcDpQ87p4Fk0NsO3qhHOZWucVyJnCgEhIASEgBAQAl5HIIPXtUgaJASEgBAQAg4R6NKlCwoWLIjWrVujxhNAgxeAPIXtU7F+FnQcaLESBREe/i9y5Mhh34lSSggIASEgBISAEPAJAjLC6RO3WRopBISAEEiaQKtWrRAVFYUKBdpgWndlRCpDMky52d68mvi888eAnSuB2UNijc3XBvbBif/OoGTJkokLyx4hIASEgBAQAkLApwnICKdP335pvBAQAkLgAQGOTs6dvhItGn6NCZM/ViOXyrJUUrgcULwqEHUBOLXPD1fOxSB33mxo2KgBfvp8JJo2bfpAiWwJASEgBISAEBACQsCCgBicFjBkUwgIAe8mEH0nEqeiduhGFslZA1kz5PHuBjvZOib94RIREYGtW7ciJDQEW7ZsRp0qJdCkd1PUqFED1atXd1K7nCYEhIAQEAJCQAj4EgExOH3pbktbhYCPEzh1ZTumhrTQFPrVXQuZizPpL0TRokXBpW3btkkXlKNCQAgIASEgBISAELBBQAxOG2BktxAQAkJACLg3Af+sASjt39y9Kym1EwJCQAgIASHg4wTE4PTxL4A0XwgIASHgqQTqFu0BLiJCQAgIASEgBISA+xKQLLXue2+kZkJACAgBISAEhIAQEAJCQAgIAY8mIAanR98+qbwQEAJCQAgIASEgBISAEBACQsB9CYhLrfveG6mZEBACKUhg2YE3kSVjHhTNUQPtKk6yeaWQiOkIjZhh87jlASYisiURUdvBa9ojZulpV2EiiuasafOSRgIlmwXuHzBLT52i3ZN0gV22fxAirsZmEU6qTsnpSepcOSYEhIAQEAJCQAikLgExOFOXt1xNCAgBNyFAA1BLTEySNboUHY4jF4OTLGPPwRu3I1NdD6+ZlNjbLrP0JJfgh/fkyKV1SVVZH0tOT7IKpIAQEAJCQAgIASGQagTE4Ew11HIhISAE0poAp0F5+KE3EGdsqgolNQLI+pqVCZWjqWYYSo7oYdmkxN76mKWHLJOS5O6FcW5yeoxyshYCQkAICAEhIATSnoAYnGl/D6QGQkAIpCKBpNxnrVXDrEyoNKb61Qu2dgmH9pmlhxc1oz5m6nH03jgETgoLASEgBISAEBACaUJAkgalCXa5qBAQAkJACAgBISAEhIAQEAJCwPsJiMHp/fdYWigEhIAQEAJCQAgIASEgBISAEEgTAmJwpgl2uagQEAJCQAgIASEgBISAEBACQsD7CYjB6f33WFooBISAEBACQkAICAEhIASEgBBIEwJicKYJdrmoEBACQkAICAEhIASEgBAQAkLA+wmIwen991haKASEgBAQAkJACAgBISAEhIAQSBMCYnCmCXa5qBAQAkJACAgBISAEhIAQEAJCwPsJyDyc3n+PpYVCwGcItGjRwmfaKg0VAkJACHg7AenTvf0OS/t8hYAYnL5yp6WdQsAHCAQHB/tAK6WJQkAICAHfICB9um/cZ2ml9xMQg9P777G0UAh4NYH69evjiSeewPXr1726ndI4IeAogfLlyyMwMNDR06S8EEhTAiNGjMD27dsRGRmZpvWQiwsBdyNQoEABVK9e3d2qZVd9/GKU2FVSCgkBISAEhIAQEAJCQAgIASEgBISAEHCAgCQNcgCWFBUCQkAICAEhIASEgBAQAkJACAgB+wmIwWk/KykpBISAEBACQkAICAEhIASEgBAQAg4QEIPTAVhSVAgIASEgBISAEBACQkAICAEhIATsJyAGp/2spKQQEAJCQAgIASEgBISAEBACQkAIOEBADE4HYElRISAEhIAQEAJCQAgIASEgBISAELCfgBic9rOSkkJACAgBISAEhIAQEAJCQAgIASHgAIH/B6DdOKUBt9vbAAAAAElFTkSuQmCC"
+    }
+   },
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Introduction to Neural Graphs (NGs) \n",
+    "\n",
+    "The Neural Graph is a high-level abstract concept empowering the users to build graphs consisting of many, interconnected Neural Modules. A user in his/her application can build any number of graphs, potentially spanning over the same modules. Once defined, graphs can be trained, exported/saved and imported/restored in other application(s).\n",
+    "\n",
+    "![neural_graphs_general.png](attachment:neural_graphs_general.png)\n",
+    "\n",
+    "The import/export/save/restore options combined with the lightweight API make Neural Graphs a perfect tool for rapid prototyping and experimentation.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "attachments": {
+    "neural_graphs_nesting.png": {
+     "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq4AAAHiCAYAAAApjU6MAAAKx2lDQ1BJQ0MgUHJvZmlsZQAASImVlwdYU1kWgO97L73QAhGQEnpHepUSeiiCdLARkkBCiTEhqNgQEUdwLKiIoCLgoICCYwFkLIgF2yCoiH2CDCrqOFjAgso+YAkzu9/ufnved3L/d3LuOefe7958JwBQCWyRKANWAiBTmCWODPRhxCckMvC/AwSQgAogADybIxExIyJCASpT499l5C6AxsfbVuOx/v37/yrKXJ6EAwAUgXIyV8LJRPkEqm84InEWAMgB1G6wNEs0zldQVhWjBaL8aJxTJ3lonJMnGIOZ8ImO9EVZHQAChc0WpwJAMUTtjGxOKhqH4oeyjZArEKKMvgNPDp/NRRnNCywzMxePswxl0+S/xEn9W8xkeUw2O1XOk2uZEIKfQCLKYC//P7fjf0tmhnQqhzGqFL44KBId6eie3UtfHCJnYfKc8CkWcCf8J5gvDYqZYo7EN3GKuWy/EPncjDmhU5wiCGDJ42SxoqeYJ/GPmmLx4kh5rhSxL3OK2eLpvNL0GLmdz2PJ4+fwo+OmOFsQO2eKJelRIdM+vnK7WBopr58nDPSZzhsgX3um5C/rFbDkc7P40UHytbOn6+cJmdMxJfHy2rg8P/9pnxi5vyjLR55LlBEh9+dlBMrtkuwo+dws9EBOz42Q72EaOzhiioEf8Aeh6MMAMcAOOABb9DMMgCzesvEzCnwXi5aLBan8LAYTvWU8BkvIsbZk2NnY2QAwfmcnj8T7exN3EaITpm0iNL4reuaR6mlbsiYAzeg50iBO2wwPAqAYD0BTLkcqzp60jV8ngEV/CxSBKtAAOsAAmAIrtDIn4A680YqDQTiIBglgIeAAPsgEYrAUrARrQQEoAlvBTlAGKkA1OASOgGOgGZwG58FlcB10gR7wEMjAAHgFhsAIGIUgCA9RIRqkAelCRpAFZAe5QJ6QPxQKRUIJUBKUCgkhKbQSWgcVQcVQGVQJ1UI/Q6eg89BVqBu6D/VBg9A76AuMwBRYFdaGjeFZsAvMhEPgaHgBnAovgXPgfHgzXApXwYfhJvg8fB3ugWXwK3gYAQgZoSN6iBXigvgi4UgikoKIkdVIIVKCVCENSCvSgdxGZMhr5DMGh6FhGBgrjDsmCBOD4WCWYFZjNmHKMIcwTZiLmNuYPswQ5juWitXCWmDdsCxsPDYVuxRbgC3B1mBPYi9he7AD2BEcDkfHmeCccUG4BFwabgVuE24vrhHXhuvG9eOG8Xi8Bt4C74EPx7PxWfgC/G78Yfw5/C38AP4TgUzQJdgRAgiJBCEhj1BCqCOcJdwiPCeMEpWIRkQ3YjiRS1xO3EI8QGwl3iQOEEdJyiQTkgcpmpRGWksqJTWQLpEekd6TyWR9sit5LllAziWXko+Sr5D7yJ8pKhRzii9lPkVK2Uw5SGmj3Ke8p1KpxlRvaiI1i7qZWku9QH1C/aRAU7BWYClwFdYolCs0KdxSeKNIVDRSZCouVMxRLFE8rnhT8bUSUclYyVeJrbRaqVzplFKv0rAyTdlWOVw5U3mTcp3yVeUXKngVYxV/Fa5Kvkq1ygWVfhpCM6D50ji0dbQDtEu0AVWcqokqSzVNtUj1iGqn6pCaipqDWqzaMrVytTNqMjpCN6az6Bn0LfRj9Lv0LzO0ZzBn8GZsnNEw49aMj+oz1b3VeeqF6o3qPepfNBga/hrpGts0mjUea2I0zTXnai7V3Kd5SfP1TNWZ7jM5MwtnHpv5QAvWMteK1FqhVa11Q2tYW0c7UFukvVv7gvZrHbqOt06azg6dszqDujRdT12B7g7dc7ovGWoMJiODUcq4yBjS09IL0pPqVep16o3qm+jH6OfpN+o/NiAZuBikGOwwaDcYMtQ1DDNcaVhv+MCIaORixDfaZdRh9NHYxDjOeINxs/ELE3UTlkmOSb3JI1OqqZfpEtMq0ztmODMXs3SzvWZd5rC5oznfvNz8pgVs4WQhsNhr0W2JtXS1FFpWWfZaUayYVtlW9VZ91nTrUOs862brN7MMZyXO2jarY9Z3G0ebDJsDNg9tVWyDbfNsW23f2ZnbcezK7e7YU+0D7NfYt9i/dbBw4Dnsc7jnSHMMc9zg2O74zcnZSezU4DTobOic5LzHuddF1SXCZZPLFVesq4/rGtfTrp/dnNyy3I65/elu5Z7uXuf+YrbJbN7sA7P7PfQ92B6VHjJPhmeS535PmZeeF9uryuupt4E317vG+znTjJnGPMx842PjI/Y56fPR1813lW+bH+IX6Ffo1+mv4h/jX+b/JEA/IDWgPmAo0DFwRWBbEDYoJGhbUC9Lm8Vh1bKGgp2DVwVfDKGERIWUhTwNNQ8Vh7aGwWHBYdvDHs0xmiOc0xwOwlnh28MfR5hELIn4ZS5ubsTc8rnPIm0jV0Z2RNGiFkXVRY1E+0RviX4YYxojjWmPVYydH1sb+zHOL644ThY/K35V/PUEzQRBQksiPjE2sSZxeJ7/vJ3zBuY7zi+Yf3eByYJlC64u1FyYsfDMIsVF7EXHk7BJcUl1SV/Z4ewq9nAyK3lP8hDHl7OL84rrzd3BHeR58Ip5z1M8UopTXqR6pG5PHeR78Uv4rwW+gjLB27SgtIq0j+nh6QfTxzLiMhozCZlJmaeEKsJ04cXFOouXLe4WWYgKRLIlbkt2LhkSh4hrJJBkgaQlSxVtjm5ITaXrpX3Zntnl2Z+Wxi49vkx5mXDZjeXmyzcuf54TkPPTCswKzor2lXor167sW8VcVbkaWp28un2NwZr8NQO5gbmH1pLWpq/9Nc8mrzjvw7q4da352vm5+f3rA9fXFygUiAt6N7hvqPgB84Pgh86N9ht3b/xeyC28VmRTVFL0dRNn07UfbX8s/XFsc8rmzi1OW/ZtxW0Vbr27zWvboWLl4pzi/u1h25t2MHYU7viwc9HOqyUOJRW7SLuku2SloaUtuw13b939tYxf1lPuU964R2vPxj0f93L33trnva+hQruiqOLLfsH+e5WBlU1VxlUl1bjq7OpnB2IPdPzk8lNtjWZNUc23g8KDskORhy7WOtfW1mnVbamH66X1g4fnH+464nekpcGqobKR3lh0FByVHn35c9LPd4+FHGs/7nK84YTRiT0naScLm6Cm5U1DzfxmWUtCS/ep4FPtre6tJ3+x/uXgab3T5WfUzmw5Szqbf3bsXM654TZR2+vzqef72xe1P7wQf+HOxbkXOy+FXLpyOeDyhQ5mx7krHldOX3W7euqay7Xm607Xm2443jj5q+OvJzudOptuOt9s6XLtau2e3X32ltet87f9bl++w7pzvWdOT/fdmLv3euf3yu5x7724n3H/7YPsB6MPcx9hHxU+Vnpc8kTrSdVvZr81ypxkZ/r8+m48jXr6sJ/T/+p3ye9fB/KfUZ+VPNd9XvvC7sXpwYDBrpfzXg68Er0afV3wh/Ife96Yvjnxp/efN4bihwbeit+Ovdv0XuP9wQ8OH9qHI4afjGSOjH4s/KTx6dBnl88dX+K+PB9d+hX/tfSb2bfW7yHfH41ljo2J2GL2RCuAoAqnpADwDu0TqAkA0LoAIM2b7KknBJr8HzBB4D/xZN89IU4AVLcBEJ0LQCg67kZHY1QVvQGIQDXaG8D29nL9p0hS7O0mY5Gb0dakZGzsPdo/4s0A+NY7NjbaPDb2rQYt9gEAbSOTvfy4KB0GYP8K27CQmJ6ukVzwL/IP1NgR3fsPECIAAAGdaVRYdFhNTDpjb20uYWRvYmUueG1wAAAAAAA8eDp4bXBtZXRhIHhtbG5zOng9ImFkb2JlOm5zOm1ldGEvIiB4OnhtcHRrPSJYTVAgQ29yZSA1LjQuMCI+CiAgIDxyZGY6UkRGIHhtbG5zOnJkZj0iaHR0cDovL3d3dy53My5vcmcvMTk5OS8wMi8yMi1yZGYtc3ludGF4LW5zIyI+CiAgICAgIDxyZGY6RGVzY3JpcHRpb24gcmRmOmFib3V0PSIiCiAgICAgICAgICAgIHhtbG5zOmV4aWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20vZXhpZi8xLjAvIj4KICAgICAgICAgPGV4aWY6UGl4ZWxYRGltZW5zaW9uPjY4NjwvZXhpZjpQaXhlbFhEaW1lbnNpb24+CiAgICAgICAgIDxleGlmOlBpeGVsWURpbWVuc2lvbj40ODI8L2V4aWY6UGl4ZWxZRGltZW5zaW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAgPC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4K+urioQAAQABJREFUeAHsnQeYFEUThuvI6ThyONKRc85BBZUoSUyYMSdQMKMioL8JxIgBFAVFURRRUZEkGclRJWc9QHLmSPf310evc8vmnU13Xz3PsLMz3dU97yx7tTXVVXGpSoRCAiRAAiRAAiRAAiRAAlFOIEuUz4/TIwESIAESIAESIAESIAFNgIYrPwgkQAIkQAIkQAIkQAIxQYCGa0zcJk6SBEiABEiABEiABEiAhis/AyRAAiRAAiRAAiRAAjFBgIZrTNwmTpIESIAESIAESIAESICGKz8DJEACJEACJEACJEACMUEgm7dZfvfdd3L//ffL3r17vTXleRIgARIgARIgARIgARJwS6BXr17y6aefuj3v7YRXj+uaNWtotHqjyPMkQAIkQAIkQAIkQAJeCYwePdprG08NvHpcrZ0HDRpkfct9EiABEiABEiABEiABEvBKYNasWYItWPHLcB04cGCw47E/CZAACZAACZAACZBAJiNgh9EKZF5DBTIZV14uCZAACZAACZAACZBAlBKg4RqlN4bTIgESIAESIAESIAESSE+Ahmt6HnxHAiRAAiRAAiRAAiQQpQRouEbpjeG0SIAESIAESIAESIAE0hOg4ZqeB9+RAAmQAAmQAAmQAAlEKQEarlF6YzgtEiABEiABEiABEiCB9ARouKbnwXckQAIkQAIkQAIkQAJRSoCGa5TeGE6LBEiABEiABEiABEggPQEarul58B0JkAAJkAAJkAAJkECUEqDhGqU3htMiARIgARIgARIgARJIT4CGa3oefEcCJEACJEACJEACJBClBGi4RumN4bRIgARIgARIgARIgATSE4gKw3Xz5s1y5MiR9DML07tRo0bJq6++Klu3br1oxNTUVDl48OBFxyNxIJrm4nz9+/btcz7E9yRAAiRAAiRAAiRgO4GIG64wyDp16iQjR460/eJ8UThkyBDp37+/bNy48aLm11xzjRQqVEhGjx590blQHDh//rxbteGei9uJOJ0YMGCAVKhQweko35IACZAACZAACZCA/QQibrjOmzdPNmzYIB999JH9VxekxvXr12sNW7ZsCVKT5+5nzpyR559/XkqWLCnuvJfhmovnmf539ty5c/LYY4/J//73v/8Oco8ESIAESIAESIAEQkggWwh1+6TaGKwwXhcvXixNmjTxqV84Gv3888/y+++/S5cuXUI63IkTJ+TFF1/0OEa45uJxEhdOrlu3Tu6++26ZP3++L83ZhgRIgARIgARIgARsIRBRwxXexc8//9xxIWPHjnVpuB49elTwGD1//vxy+PBh7aEtWrSolCtXTrJkSe80Pn36tJw8eVJy5swp2bNn17Gr6FOlShWJj493jOVpB97EY8eOScGCBXUYQ65cuVw2xzgIMYiLi5Nq1arp8ZwbIkYWbTBPtMmXL1+6JmfPntXXZA4eOnRI64FOXK+vc9m/f79s2rRJSpcuLaVKlTLqHK8wjhGWkTdvXn1sz549sm3bNv2YHyx9lQULFkjLli19bc52JEACJEACJEACJGAbgfRWn21qfVP0xRdf6IaNGzfWr4glPXXqVLrOMLZgwBUoUEAQ5wljsmnTptrgypo1q7z//vvaIDOd3njjDd22YcOGUqNGDalcubI0atRI62jTpo3LRVimr3mdNm2a1oExsU2ePNmc0q9//PGHtGrVSvLkySN169aVOnXqSI4cOeTmm2+W3bt36za//vqrYA6IkcV8cY0wnG+88UaBkWlkypQp2gA37zFfjJmQkKAXrHmby+rVq/UcihQpIs2aNdOGK3hNmDDBqNSv3bt310bzmjVr9LxKlCih2xcrVkzP+/jx4+nau3sDgxfywgsvyFdffeWuGY+TAAmQAAmQAAmQgO0EIma4wiCF0QkZOnSo9ojCs+psJFqveOLEidK2bVvp2bOnNtBw7qGHHtLxodZ22P/rr78E3stbb71V98GxWbNmaSPPGF845kpgHLvzKmJ+tWvX1o/JYfRhLgglgFE6adIkgccXsnTpUlm+fLk+fskllwjaQmDs9e3bV+/jn8TEROnWrZvjPXRdf/31eoPH2NNcEFoBwxnGKzzKDzzwgGAscLz22mvl448/dug1OzCyMS+MY+b05ZdfyltvvWWaeHytVauWIFQAi7JguFNIgARIgARIgARIIGwElAHpUQYNGpSqJqM3jw39PDl79mytUxlPqepxeapanKTfKyMunSYVIuAYX3kRHeeUgZj66KOPOs7t2LFDn3vllVf0MeV9TFWP+x3tlXGXirFwLX369HEcVwafPqY8n45jZqdBgwb63I8//qgPKYPQoeO6665Lp//AgQOpypA0XVNViEAq5qIe0etjagFW6rBhw7Q+zGHXrl2OtsrAdhzfu3ev47h1x3kuKoQgFdcIXcoITVWeakdzc8+UMZ2qwiT0cWXw67bQo7zC+hjmZBiqzACO/r7u/PDDD1onxqGQAAmQAAmQAAmQgDsCrVu3dtg67tr4cjxiHlfjDbzjjjsEj/y7du2qbDARZQyJO48ovItG4I3EinbjNcQjdasgntTEc+I4vKRYuQ/5+uuv9au//3z//ffy77//ai/qhx9+mE4/PKMm5AF68bj/6aeflty5cwviWP/55x+56qqrHEMivjQYgUd54cKFWgU81ojpNfL444/rOcLzOnfuXHNYvz7yyCNSvHhxvZ8tWza57bbb9D4yJyCelkICJEACJEACJEAC0UogIoardVHWDTfcoNkoT6Dj8f8333zjEy8Yhc2bN9dtXRUQcFZi2sL49DWm06oDxiLkyiuv1LGr1nOu9hctWiQdOnTQi62SkpL04izTDnMIRlC0AYIQhapVq6ZTBYO9Zs2a+pg3A9m6MAsL0igkQAIZl4DyZggWavq6mdAnu4ngRzXSAAYjWA8RyPd4MGPGWl84TFBgx4Tlxdr8OV8ScEUgIoarWZSFuMz69evreWEVPRYuQUyKLP3Gyz9YlARRj969tBQdL2oaBWI4GmOxTJkyRo3b13HjxunFT1h8BUEs7BNPPKENTbed/DihQgp0a+R+dSUm/tQsFnPVBsewqIxCAiSQOQisXbtWPynCj1tftptuusl2MEuWLNGLZfFULFDjFYY3ssogS8uff/5p+xwzisLt27frAjuDBw/OKJfE6yABCXs6LPziN7/+kLvVeAZxL3bu3KlvCRYbrVq1Si888naP/v77b93EF2PSGoKAVfX+ihnDm5EMb4L5wke4wMCBA8Wk1JoxY4ZeHOVubPDxRczjfixAcxZ4SX777Td9GOmxKCRAAiQAAnAQ+CPO6Qb96euuLbyAEIQnwXBF2Je/ghLhxvmA72Pr3xF/dbE9CZBAbBEIu8d1zpw5Og8rMKFUKB73mM362Np4ZT3hRMiB8WhWqlTJU1N97pdfftGvMOYQZgBBnCdELa7Sr57+gYcAglRXrgxG0xceBSMoLGCMVnPM+dX6x8RV6Vnn9nhvyqziy9u5sheyBhipWLGi2eUrCZBAJidQvXp1HcuOeHazqUWjmgoyqZhj5jXQ9QCeMHfu3FmQIQaZV8yTIU/tXZ2D4wHFYZDJ5fLLL3fVhMdIgAQyKIGwG65mURZyjuLRu/P28ssva9RY/OT8GAmPPYwgsf+9996r3yLOE+mdrIJf9db+qDxlqlNhgZIR43lFDlk8fvIkV199tWPRE8a2Gq/wFr/22mu6OxZqGTFeZLxHblVjVFoNZcwfGwRGsfG6ouiCO0GOWjCE9O/f33GtWAj29ttv6+MwbpEei0ICJEAChgC8qNbN/HC2HjP75hxiSVHIBQYtBE4DLA61fr/heHJysqBICfJF47vIWfAdC13IqQ0j2gi+66Df9MF3N568YV2B9Xsc7VNSUnRb9Mf3G+ZqBE+74AiB4HsURVnwnespfh9zWrFihSCMAoJrw98IFJjxVfB9Dh64fgj2sWjWiJkzXiEYE+kMUcrbXDOO4+8ajsOodzdn9DWscK14OolwCV/ikdEG1wou5l5iXAoJxBQB9Z/bo5jUSuqiPLbz5aTyDjpSIahwAZdd1H9AR5uffvop1ZoOC3NQXoFU9Qs7VRl6jnafffaZQ5dJh4W2SH/VsWPHVGXkOdoi/ZX64nS0HzJkiOMcdFrTUTmnoEInFbuarj3mor48HceUFyBVfTmlmjRb0HnLLbeka4O5IT2V+kJ2zEMZwg4dSE2l8q2m3nXXXY7zruaCsaALG/qo/K+OcXHMpPGCEpMOa8yYMQ6d2MH1Gh1Iy+WPMB2WP7TYlgSik4Aq2qK/A/A95k7M99n48eMd3yX43sD3HwQ6TLpB832CVxVbme57TmVecXzf4DvOiCofrY8jLaLKKe1oAx34DrWmK1Q/1NOdVwVdtBrztwJ/I1Q2lXR/I6BHORbMcPoV39MqjCudLlyDudannnoqXXtXb5BaEKkRrdeM6zJ/n/A3DPLwww/rNph7v3790rVXxnaqyg+ear7jrbrU2ohUZUinG9p8l+PVjGP6qFze6dI0Gq5oh+sx7fCKvxnLli1Lp5tvSCCUBOxKh+XVGrXTcH3vvfcc/3GseUydQZkvDlU8IJ3hqh7xO/rjPx7aTZ8+PV13Y7jiP6r1PzX2YRziS8Iq6heo40sFOpWn1nHafJGox1GOY9iBwWjOWb8IHnzwwVSV3UC3Vb/etfFpzpsvDhXX6/iCVxW4dFv8gzywKl1WuuszfxRw3oznPBfkpzX5XM1Y+OLEF5ZV2rdvr3VbjXycx5ei6afixqxdvO5jLuiLL3sKCZBAbBLwx3A13xWqimEqvo9VikB90eYPEr4LzL5pq56yOcDgO9h8v7syXE0fOBuMgYZj+P40P6xHjBiRajWAnQ1XowNzQV5w8x6vKlTNMRe1WNZxDvpg8Jq2+L5V3kxHW1c7+NthnQe+v2EMGh2q0IyjmzFczTl8H6MvjFiIehqo++E68QMCczdt4fiwipULOOHvpPn7gD64DhjxEGO4Gl1oj78r5j3GsTpyrONwnwTsJmD9bghGd1gN10Aman5F4z+aeryhDU8Yhe7+sxnDtVevXqnqUUiqCi/Qm/mP7G4O+JJSYQu6GIK7Ns7HMQf1KCtVLfpyfFFY22BMjK8ep6U7D0+rimVN54kw/fALHsattXiCOefpVT0+SlUVrfzu50knz5EACWR8Av4aritXrrwICoqvqAp8ju85FD7BEyB8b8NQtQq8kDjuznBVWWUczVX8vsPIwtMuI/ihb4wvV4YrnlbBsIRgLsYovf/++/Ux9DH9cf1GjLcXBqR6hG8Ou3w1P9yhR61r0G3w3Q6jHsfgiTViNVytTxvN3yV/CtYYwxVGr+mPceANN9f07bff6qGthqv1B4QKL3C0RT8KCYSDgF2G63/BQeoTHwuC9CdYJOVLUD9in8qWLas3E6vl7hqRwF/9WtbFENy1cT6OOSDOSv1qdblaF2NifCwGs46PBWFYTGYWhln1IltA5cqV0xU3sJ53t4/FZsjnai264K4tj5MACZBAIARU6JXLbC/Ke6jTGeJ7DrGhWDSKstMQZI/xNZ4S36V33323Y2rly5cXrC2AWNc4OBq42UHcv8lWkD9/fkeqRfVETPdA/KyR22+/3ezqEuF4g1hZ9RjdcdzVDmJFIcoAl0aNGul9fKejqA4E+cid43Nx3JzHvvm7EEjBmnr16jn6Q5cylEUZzdjVhXz0zoV/wFUZ845DmLNZI+HrgmBHZ+6QQIQJxJzhGmFeHJ4ESIAEMi0B5UV1ee3K86crEiItFX7Q48e3ta11IatLBR4OlipVSp/FwqVABYYbxFX+a+tCWSx6MmJdNGWOWV/NvJArG9dvxDpPY5iaczCS3WWZsaNgDRarQXwxRk1BHus1m3nylQSimQAN12i+O5wbCZAACUQRgcTExItmox4xykMPPaSLrCALAIzE3r17yz333HNR20AOGM9pIH1NH+dCK3g6ZYzZd955R6/yx4p7FT9runjNDWuMRHiXVZiENl5hBH/66adaB847P1Uzxq5jkAs7dhWsKVy4sNZo8ps7j2N9b83GYD3OfRKIdgJpSUyjeJb4xYrUIvhyNAUAPE33tttuExVHIdacsJ7a8xwJkAAJkEDgBObNmydIXwhByr/u3bvrFFXwsvpTBTHwGQTWE2kDUa3x3XffFaRDRLgVjFDI8OHDJSEhwaNieJX79Omj+6sFUrrQjDWn9uuvv+6xvzmJsIRAC9YYHebVpONCVTEKCWRUAjHhcW3atKmOx3H+1ezqpsAjgNgdJt53RYfHSIAESMBeAqZKn1pVLz169EiXV9XekezVhthZtYhXK4XxCKMVsZ+zZ8/WHmRfRhswYIColfq6qTFaVSYDXRSmSZMmvqgQfwvWuFMK545KgahPmzm5a8vjJBDLBKLe4xrLcDl3EiABEsjoBMzjaSTwR6wnHkHj9dVXX3VcOuI+TTvHwQjvIJTh888/14/2sbgMgkf7KguCTzPDIjQ4SVAoABUc8UQQ143FYL48HTSDOBeswYI0iLuCNaafWWiG94jHBW+VRUCfvvPOO00zvpJAhiNAwzXD3VJeEAmQAAmEj0CHDh30YPA4IstKixYtdPUsZBMwghhQVVzAvI2K123btul5qFRVOouAmRRiX1Xifxk0aJBYjUpz3rwiJtZ4WZFBAV5bI/Dc3nzzzaJyxaZb+W/OW1+xoA3GMnjVrVtX4LFFBgVVRMHRDPxUPtd0MbOY33fffSclS5bU1bBMmAOyB5iMAQ4F3CGBDEQgJkIFMhBvXgoJkAAJRBUBXxbpmEVGrtoitR8eUavcp9r4Qrwo4lvHjh0rKmervlZrvKcrHeaYq3AwszLftIFC6745b4XqfMy8z5o1q6PZSy+9JCoZv36PlIVI9QXPKwxALNh65plnHG1d7SAGduTIkaKS/+vTMD5RehyvSLelKlXpTAs4acY1r1Z9uGZVhVCHKcD4BTeVK1f3hzELQ3ratGm6PKy1H45jHHh7MWe8V3lo08UVG06uuJq5GDZW3dwngWgmEIeks54mqEr26V+eaOOlqSc1PEcCJEACJJCBCeBxNR5fIySgUKFCjivFo3RVoU+SkpIcx6J5R1V41FkRYICuX78+oKki1nfixIlaDxZ/+SL4+6qK1WijHNkHjEEJrvAOgx9+QLRr104bsqp8t/YMwzsLz3CRIkV8GYZtSCBiBNq0aSOzZs3S4wdjTzJUIGK3kAOTAAmQQMYhAKMKK+2dBUVjsEWbTJ48WXuEUawAHlcI4lbh3YRYjW99wMU/L7/8si50oMp9O4q/IK+riTVFYQFfBYYqCtY4C7jCq+1K4El1xdxVWx4jgYxCgIZrRrmTvA4SIAESIAGfCKgS2TqjAB6xqxKs+jE9qidaV/gjXZYnQeWsZ599Vjd58sknRZWV1QUOTGwvjOFHH33UkwqeIwESCIAAY1wDgMYuJEACJEACsUsA1b2+/vprad++vb4IxIoaoxVxrvC6ektnVatWLfnss88c6bDgZTVGK8qvIk2Yp8VdsUuPMyeByBKgxzWy/Dk6CZAACZBABAigUA02xJBu3rxZUlJSdMiALyECmC4qeqHwADZ4cDdt2iTw2sLTmjdv3pBdERaOYfGbu/CBkA1MxSQQJQRouEbJjeA0SIAESIAEwk8AMaQoARuMwIOLFFjhkGrVqoVjGI5BAlFLgKECUXtrODESIAESIAESIAESIAErARquVhrcJwESIAESIAESIAESiFoCNFyj9tZwYiRAAiRAAiRAAiRAAlYCNFytNLhPAiRAAiRAAiRAAiQQtQRouEbtreHESIAESIAESIAESIAErARouFppcJ8ESIAESIAESIAESCBqCdBwjdpbw4mRAAmQAAmQAAmQAAlYCdBwtdLgPgmQAAmQAAmQAAmQQNQSoOEatbeGEyMBEiABEiABEiABErASoOFqpcF9EiABEiABEiABEiCBqCVAwzVqbw0nRgIkQAIkQAIkQAIkYCVAw9VKg/skQAIkQAIkQAIkQAJRS4CGa9TeGk6MBEiABEiABEiABEjASoCGq5UG90mABEiABEiABEiABKKWAA3XqL01nBgJkAAJkAAJkAAJkICVQDbrG+6TAAmQAAnEDoFZd9whR7dt82nCVW6/Xar26uW27YK+fWX/qlVuz5sTdukpXLeutHjrLaP2otf1o0fLhjFjLjrufMAuPdDbZeZMZ/WO9/tXrpQF/fo53nvasUtPizfflML16rkdalKbNm7PWU+ES4+vn6Gkbt2ktvq8UUggEAI0XAOhxj4kQAIkEGECMKRg3PkqJS+7zGPTfUrfrtmzPbbBSbv0pKamehwLBnnyrFke2+CkXXq8DZRy6JBP87FTD8b0JL7wQf9w6fH1M4R503D1dGd5zhMBGq6e6PAcCZAACUQpAXji7lPGH7xcMBi8SXxSkscmRTx49qwd7dLjbTyM481Ixrzs0mO9Rlf7OQsU8Gk+rvpaj/mjB209iS980D9cerzdC8wF99WT5x9tKCTgiUCc+rXq8Wfv4MGDZdCgQVqHl6aexuE5EiABEiABEiABEiCBTEqgjQptmXXhKUow9iQXZ2XSDxAvmwRIgARIgARIgARijQAN11i7Y5wvCZAACZAACZAACWRSAoxxzaQ3npdNAiSQuQlMmzZN1q9fL6sXLpS/Vq+WzTt3yqHjx6VCyZJStVIlqdWkiVSqXl1q164t9evXz9ywePUkQAJRQ4CGa9TcCk6EBEiABMJDYOBzz8kLL70khbNlk2Jnz0pxNWxbtSWobfeOHXob99tv8m+OHHIuSxZ59vnnpX///uGZHEchARIgAQ8EaLh6gMNTJEACJJCRCGzevFm6XnmlrFeppu5QF9ZUGa3OUst64PRpGa/eP/PMMzLpu+9kwZIl1rPcJwESIIGwE2CMa9iRc0ASIAESCJ7AaZXjE3lXsflShAAZYiqpEIDzymh9RQ3f1McpXK/a3aW2JUuXSlxcnKxYscLHnmxGAiRAAvYToMfVfqbUSAIkQAIhJ4DcraZyUsOBA6XRoEFux1y0aJFOa9hZtcDmrzRWHbC9praWzZvLiVOn/FXhtn3yUXUd632rSHVfI/eVrSKhp0vVNyUxvp7baxuxtI3bc9YTdulpmHi7NErsZVWdbn/Sur6SfMx7dTRvetIp9ePNUvUZXaZ+QEGQg5hCAoEQoOEaCDX2IQESIIEYItCjc2cpqeYbiNFqvcwn1JsHU1JkwJNPyotDhlhPBbx/6swh2XJgVsD9TcdI6MGYnsTX67JLT4WCl3majsC433Jwtsc2OOlNj1cFbEACISTAUIEQwqVqEiABEog0gfvuuUd279sn99swEfzB6KO2/w0dKhMnTrRBI1WQAAmQgH8E6HH1jxdbkwAJkEDMEEDKq5Effyw91YyROcAOqamUXKm2Rx95RDp06CC5c+cOSm2FQq3ltXbBPzaONj2AYsd12annvsazoC4gMWEGifnqSpdqbwWkg51IwA4C9LjaQZE6SIAESCAKCaxZs0aKZM8urW2eWxOlb5vK+7px40abNVNdtBLQYQYqpAOvFBKIJAEarpGkz7FJgARIIIQEls+fL8XOnLF9hESlMZvKMLBp0ybbdVMhCZAACXgiQMPVEx2eIwESIIEYJrBY5V0tEYL5I8asmFoVvv7PP4PSPmJJa3lqapzeglLEziRAApmGAA3XTHOreaEkQAKZicCRI0dko3qcHwrDFRwRMzt3xozMhJTXSgIkEAUEaLhGwU3gFEiABEjAbgLLly/XKkNluJZS2hcqjy5Lwdp956iPBEjAEwEarp7o8BwJkAAJxCiBRo0a6ZnvCtH8/1Z669SuLSNHjpT69evLDHpfQ0SaakmABKwEaLhaaXCfBEiABGKEQJF69aTLzJl6q9qr10Wzzpcvn9SsUEGSLzpjz4HdanFWl+uukz9VnGuNGjXkyiuvlCeeQIkCCgm4JoDPqfnMum7BoyTgnQANV++M2IIESIAEoo5AjgIFJLF1a73FJyW5nF/zVq0kFB7Xk2q03WpxVtWqVaVEiRLyxRdfyOeffy6fffaZ1FZe2ClTpricDw9mbgL4nJrPbOYmwasPhgAN12DosS8JkAAJRDGBhs2byx6Vx9VuMV7cKlWqOFTfcsst2vvaoEEDXZigX79+cv78ecd57pAACZCAHQRouNpBkTpIgARIIAoJNG3aVM5kzSrjbZ7bDBUmcIXy9loNVwxRpEgRGTNmjIwbN07Gjx8vNWvWlJ9//tnm0akuEgQS4+tJhYKXCV4pJBBJAiz5Gkn6HJsESIAEQkgAi6aee/55eeaZZ6S8GqexDWPNUTqWqzCBVW+/7VZbz549tdf1sccek86dO0vv3r1l2LBhkiNHDrd9gj2xU6X+2rBhg6xfv05W/7VENm7YJEWLFZU6tepLjWp1pFKlSlKxYsWgS9QGO89Y7c8yr7F65zLevOlxzXj3lFdEAiRAAg4CSFfVVC3kGuM4EvjOXtX1S7UhlrVOnToeFRVQMbijRo2Sb775Rn788Ue9gOuHH37w2CeQk8eOHZN7779LypYtqxeI9R/UW6b8PkaOJsyXP//9XoaPHig33NhDx95WrlJeRowYEcgw7EMCJBAlBOJSlXiay+DBg2XQoEG6iZemntTwHAmQAAmQQAQJxKnH+/C6Yt1/IB6Lrarfa2prptJs/a7yt/ojMC7hfUXqrPvvv197X/PkyaPr3p86c0irGjt8jpQqVUruuusun1UvWLBArr7uKslZ8Kg0vu6cFK8sEl/k4u7nz4ns2y6y6heRFZNEOnS+QiZPmn5xQx4hARIIGYE2bdrIrFmztP5g7MlAvr9CdlFUTAIkQAIkEBoCKEiwO2dOeVCp/9PPIeBlhdFaMSnJb6MVQyE1FzydEydOlKlTp2rv64QJE3S8ZIVCrWX/5rwycOBAue+++2T16tXo4lUe6v2AtGzZUmp0OiQ3vnFOKjV3bbRCUZasqkRtBZG2vUU6PS4ye85vKmwhuyxatMjrOGxAAiQQXQRouEbX/eBsSIAESMAnAsnKczFCeVGxLb3wVMxTR8S7njh1Sp566CF5VzX8Vm071eZu3f8JdW6z2t5U2xy19enTRzZthd81cOnevbvOPHDVVVfJtddeK/fcc4+gNC10Z8uWTbJkySJ33HGH1wH69HlI3n/vQ7l7lEiT67w2T9egVlt1Ld+oVF6tz0qzZs3SneOb0BLA59R8ZkM7ErVnZAJcnJWR7y6vjQRIgAScCLw6fLg0veIK6acWTE1PTpYcyvAtriLGElW7vGrbp7bdavtXbZDmKjTglxdekI4dO6YdCPLfXLlyyXvvvaf1IXygTJkycvToUTGPDtesWSOPPPKIvO1m8dfkyZNl+PD35Ra1NqxQ6cAnA8/raWWdX39LJxk/VsUQUEiABGKCAA3XmLhNnCQJkAAJ2Efg6quv1guZli1bJtgWzZ0ry1eskBW7dkkz5Zltrx7B12/cWC9oqlWrln0DWzQh2wCKFzRW41jlzJkz8s477+jjyA3rLPc+eLvU6yySWM35jP/vu/QX+fCWabp4wq233uq/AvYgARIIOwEarmFHzgFJgARIIPIE4uPjpXXr1npTK6f0hGBI/qYWPGVVuV/DIX379pXsqkACjFVnQRgBjFpU5zLy8MMPS46CR6RdH3MkuNesqjZDh0fPSp9HHpC6det6zZQQ3Gix3Xtp8mg5dHK7FMhdThol9orti+HsY5oAY1xj+vZx8iRAAiRgD4F58+bJnj171GP44fYo9KJl6NChMn/+fJdGK7qi6tadd96ZTsv3k8ZL81tT0h0L9k3FpiKla2YRXD/FPYFl/4yWaZsHCV4pJBBJAjRcI0mfY5MACZBAlBBAmhrEn8KgDLUgjnX27NmSNy+iakVyqmwHSNdlldOnT8vixYvlySef1Id37NghO7ftkcLlrK3s2c9T/KjMXzzDHmXUQgIkEFICNFxDipfKSYAESCA2CEyZMkVOqawD//zzjy4cEMpZ165dW3766ScZNrOR3K4cvK3uTNFZBlAyFoLwAWQZOHv2rDakUcQAxm7h0tkkVz77Z1aojKoGtmKZ/YqpkQRIwHYCNFxtR0qFJEACJBBbBFJSUvRjezPrcHhdzVgoGtCwu8j48eNl7969snbtWvnggw8Ei6VQDQuCogTTZ0yVgqXPmm62vhZNEtnw5w45d+6crXqpjARIwH4CNFztZ0qNJEACJBBTBBAmYNJRYeLr16+Xr7/+OiLXUK1aNW2ofvLJJ7J9+3ZBiAAqbqF4ATyjoZCi5VVM7blUWaEyK1BIgASimwAN1+i+P5wdCZAACYScwMyZM3V8q3Wg119/3fo2YvvI89qzZ0/p0KGdnEirDmv7XI4dSFOZmIhsthQSIIFoJsB0WNF8dzg3EiABEnBDoEi9etJFGZyQeFWKNRj59ddfdXyrVcfSpUvl559/FlS5igZp2eIymTRztJqK/Y/z/90sUqJ0IaHhGto7ndi6dWgHoPZMQYCGa6a4zbxIEiCBjEYgR4ECYochcODAAVm1atVFeFB+9Y033ogawxV5VvdtxyN9kSw2p5mF3nr1Q1No4SKwmfgAPq92fGYzMUJeuiLAUAF+DEiABEggExNAmIArQR7V3377TebMmePqdNiP1alTR/LkzaWNV7sHP/xPDrm0eQe71VIfCZBACAjQcA0BVKokARIggVghgIVZyKPqLPC44vhbb73lfCpi71s0bykrJtk7/N6tIluXxEnDhg3tVZzBtCXG15MKBS8TvFJIIJIEGCoQSfocmwRIgAQiTGDy5MmCdFgoAJCQkCAnT56UBg0a6FKrKAFbvHhxWb58uT4W4anK88+9IK0umSGFSp+TxtfYM5u5o0V6P/yAtGvXzh6FGVRLl2rR8wMmgyLmZflIgIarj6DYjARIgAQyGgGkmpowYYIYA/Xo0aOSP39+eemll6RNmzZRd7lNmzaV/v2fkhdfeFkadBPJGuRfsMXfiMQdLSWv/u/NqLtWTogESMA1gSD/27tWyqMkQAIkQAJpBI6kJEuubPklR9YQlHwKEjIS/Jsk/1AVHx8v+fLlkz179gSp2bfuDUv1kgqFWvvW+EKrFwa/JJN+/l6+6LtWbhue6ldfa+ONC0RmfSwqX+0b1sPcJwESiHICNFyj/AZxeiRAArFNYOP+aTL+j16SJS6r5FQGLIzYnFkvvJr3F17/O5fwX1trG9UvZ7b4kAKB93X37t0hHcMob5TYy+z69bpi6Z+SWLq4DGn/r1z/ikhSA9+7pxwTmfiCyA6VSAHXesMNN0i/fv2kU6dOjs1VzK/vI7AlCZBAKAnQcA0lXeomARLI9ASqFemkGZxPPScnzxzUWzBQ4lQyGBjA8X/kkPhH/tWqstxbSfI81EByacPWjVGc7lyCNqBdzSOchqur8X09lvz3HnntjYHy9GMvSIOuIk2uF8lf1HPvVb+keVkLFCgoCxdOFoQebNiwQX755Re99ejRQ7Jly6YNWOSvhTFbunRpz0p51mcC60ePlg1jxuj2Jgexz53ZkAQuEKDhyo8CCZAACYSQQN4cRfXj8C0HZtkySqqcl1NnD0ncafVo/4LG/Sc2yabdm/zWrz282qN7wcOrjNuUXNtk0fo98tP60+m9vukM3/TGMYzpSMhTjw6WS5q1lwf73iGf3L1ZCqpFW4WUnVmknEixiiInj4hOn4U8rQd2ZJVDu8/JHXffLJ98NNYx3SpVqgi2vn37yqFDhxxG7NNPPy333XefNG/e3GHI1q9f39GPO/4TOLptmySrLBYUEgiGAA3XYOixLwmQAAn4QABeV7sMVx+G87nJqbNHlBF8RA7L344+5/KIbFdv5273fcESwhc8eXvdhkZoYzhBCuZWlmaA0qJFC5n32zL59ttv5ffFc2TRkgWycNwmOXM6rcJW6aSi0rhpQ2lx3RWCXLCesgcUUEUdbrrpJr1hOtOmTdOG7Oeffy4DBgyQSpUqOcIJ2rdvH+CM2Y0ESCAYAjRcg6HHviRAAiTgAwEYrr9seNKHlpFvkregyN9/+jePlLNHBZuk/ONzx+L5aso1NUZKifg6Pvdx1xALynr16qU302bZsmX6MT/SeQUqbdu2FWxvvvmmri6GkAKUwX3nnXd09gWEEpiQgkKFCgU6TEz0W5o8Wg6d3C4F1I+MQGOTY+JCOcmoJxCZ5ztRj4UTJAESIAH7CMBIK5a3un0KQ6gpjzJcTxwM4QBKdcPE2+XhZsukXIEWIRsIBQWCMVqdJ4aSs/3795d58+bJP//8I8OGDdP5b++++24pXLiwXHnllbpE7vr16527Zoj3y/4ZLdM2DxK8UkggkgRouEaSPscmARLINASqFe0UE9cKj+vxEBquXau9I9fXGi3ZsuSMCR6uJpmYmCgwWL/77jtB7tvvv/9eKlasqD2z1apV0yEJMHLnzp3rqjuPkQAJBEGAhmsQ8NiVBEiABHwlYLIL+No+Uu1guKaq9KjHD9g7gxL5asmDTeZLy7J9HIonresrI5a20ZvjYIztZM+eXbp16yYjRoyQnTt3yoIFC/T7KVOmyKWXXiolS5aUu+66Sxu5p06d8vnqNm7cKL/++qvP7dmQBDILARqumeVO8zpJgAQiSqBiocslT/bCEZ2DL4PnvRCqaafXFaEBfZotvSg0IPnoSr1oLRoXrvnCylUbZCF48cUXdZlcGJ/wvCK04JprrtHFHbp27SoffvihNnJd9TfHEE/bsWNHXcXMHOMrCZCARCiHCcmTAAmQQCYkEAteV3hcIXYZrt2qvRvzoQFpRPz/F1kIHn74Ye05PXz4sIwdO1YSEhLk2Wef1RXLmjVrpo1cLCRzFoQfQJ577jnp3r277Nu3z7kJ35NApiRAj2umvO28aBIggUgQiIU416wq10zu/MEbriXy1VahAQukRdnekUAddWPmz59fevbsKUittX//fpk+fbq0bNlSvvzyS2nUqJGOke3Tp482cpFPdpYl3ynCDmrXri14pZBAZifAdFiZ/RPA6ycBEggbgaoXqmjZMeAZleXp4C1pmk7VtUPjfzqCXaCFdEk9aoyQrFly/KeUe+kIXHHFFYIN2QnWrFmj02whPGD48OGSK1cuiYuLU7HGKthYCWJjUYa3Q4cOOnTgmWeeSafL1RssGoMnFxty22bNmlUuadla6tWrpxePIYVYuCWxdetwD8nxMiABGq4Z8KbykkiABKKTACpVVSncTjbsnxr0BM/CcL01aDUuFQRjuCI0gF5Wl1jdHoQ3FRuqdcFAvfrqq2XJkiVy7lxaEQVrR4QZ4NzHH3+s03BZz2F/6tSp0n/Ao7J6xTo5eyatkliiysSWJavI9CHf6Epi2XNklTr1qstTjz8v1113nbOKkL2H4UrjNWR4M41ihgqE8FZv375d/0LetWuXHmX+/PmCwPyzZ886Rv3mm2/0l5DjwIWdzZs3y8iRI50PB/z+5ptv1jkNjx8/7rMOrGhFHsRPP/00XZ///e9/UrVqVe0hwAmURUSOw8WLF6drxzckQAIXE6gaA2mxtOHqZ1YBFBJ4qOnvNFovvuV+HSlRooT88ccfLo1WowjfzTB0YaRa5bn/PSKo6HWu8J/S8bFzct9nInePEun0uEiHfiJ3qj8pD4xV7584JykJf8j1118vN9zSxaqC+yQQ9QRouIboFh07dkxatWolZ86c0cH4SEqN9+XLl9ePbMywr776qrz11lvmreN13Lhx8vjjj8v58+cdx4LZOXjwoPz777+OR0++6Dp9+rTu42zswpDdsGGDjBo1Sj/CgoG9evVqXXLRF71sQwKZmUBMLNBSmQX8WZzVqNQdqqDAUimb0Cwz31pbrh2xr/j74Unw3Yw4WRip+BuyZ88eKVephIz67B25aZhI+74i1duIJLgoGhZfVKTqpWnGLAzaKdN/ksLF8sumTZs8DclzJBA1BBgqEKJbgdWjf//9t/5SyZMnj37FUA888ICOXcI+jNrly5fr7aOPPhK0MzJx4kSd2Bqe18qVK5vDUfGKVbJ9+/aVokWL6lgsvEcJxCJFikTF/DgJEohmAkXyVJbE+HqCVFDRKv6ECnSvPlyal3koWi8l5uaFOFcjuXPn1im0EI+KxV0FChSQggUL6le8j4+P19W74KW9/D6RRj1MT99ea7UVwbbwq6P67wziYt3Fvt7XeJZvStmKBEJMgIZriAAvXbpUa3777bcFj+nNY3QkqUbda4i1NOCMGTOkS5e0RzbI+QeDFrJy5cqLDFeEICCYHwH8eFzkqqxhSkqKrFixQqdQueyyy7QuV//AMMZjKXxZ1axZU/AF6E1Q3hCCsAeIeY+FAxQSIAHvBLBIK9YN15IqNAALsOhl9X6//Wnx6KOPysCBA7WhigVa3qRi1dJS60r/jVar3mY9RbKpdXSXd2osi+estZ7iPglEHQGGCoTolpQrV05rhpGJlaEIEYDgvREYjEYmTJhgdtNVS7Hm94MxCu9mUlKSNnLbtm2rDU08KjKrT6EERi3yByIRNoxh/DKfPHmyQz928Mv6jjvu0O2QIxB1tlHh5fnnn0+nK10n9Wbv3r1y66236nlcddVVOkQAX7IwxuvUqePcnO9JgARcEIj2tFjwuJ46KnLujIvJq0N2hQY0LNVLrqw4UG+uR8p8R0uXLq3Dy3wxWh/ud78kJyermNXgOcFbu/vwOhn69uDglVEDCYSQAA3XEMFFfOqPP/4o8+bN06EBY8aMERinqJhiBN5UI6h5DcMU8sMPP5jDsmjRIsf+0KFD5d1339VxTfDgzp4921ET2/RBLCtyAyJMAQbl77//rtOrOJRc2OnXr5+MHj1aYHwiLOG9996TKlWq6GTYn3zyiXNzx3uEB0AnPMkQeHzxHqEDFBIgAe8E9hz7U7YfUumJojhVFAxXiKs41+7V35Pran4iWeKypzUK4l+kzWpbcZDeglCTKbviKd27b42QFrekpcyyAwLCDZ7sO0iXrbVDH3WQQCgIMFQgFFSVTsQmmUf/GAKP03v0SB+AZAxX5O3r3bu3/Pbbb7q29aRJk3Q6lBMnTuiE00iJgmD8AQMG6NnC4DSP9L/44gttPL7++uu6ugqMW3hTYZgOGjRIt0d1Fug0yauxkAoLq+AhxXHzyx6Lx5AdAIuvUFvbnUCfEfS1vjfH+UoCJPAfgc0HZsq6fb/IerXtOfbXfyeC2MuxWX2vXPgdfEzFKR5tF4Qyp66Osq8qs0D+YmknS8bXvRAa0NSpNd9GgsDX33wp5Rsq7/fV9o1evJJIy1tFRo56T1q0aGGf4gua1qu/XRuUEwfSZebMC0f5QgL+EaDh6h8vW1svWLBAezlvvPFGbbjC82myCCBNyapVq7SxiXrXJp8fjE1jtGIytWrV0gH6MEYhWJEKcTaSs2T5z7luDGaTL1B3sPxjdFkOcZcESMAPAilnj2hDVRureyfL8TP2l+vMqjLb5U77by+nbI7SyVNAJE59ZRiPa+NSd2qjNUsc/2T48TEIadNFS+dIYg37hyhUWi3W+mmu/YqVxqPbtkmypSJYSAah0gxPgN9CEbrFeJQPzyi8soUKFdKvX331lc40gCl17NjRMTMYmqVKldLvGzdu7DhuduDdRaorhBqsW7dOH0Z8qzsxcbbIVuCcCQCeVoQDUEiABPwjsO/EJu1RXbf3F1VgYIp/naOwtcksgNCA5mUejMIZZt4pIV3W6mWb5Lpr7GdQNElk0pqd+u8TshZQSCDaCNBwjdAdwQIqiFnQBK8rHtsjDABVUxISErQ3FW2QoQC1rCH79qX33GxTv2BhtBYrVkxy5sypF1jBIMaG1CmuxCwUg9H81FNPuWrCYwEQQDLwvHnz6hhj5+74UYIsEoh9RhurwEuOSjj4gZItWzZ937FwzupZt7b3Zx8hISgPiXroKC/pSfA5Qgw1vP2IXaZ4J7Dt0HyHsRrNWQK8X8nFLQoUyS01895Ko/ViNBE/Yp6KoSKW3VIkSSRHriy6VGzr1q0d6vH5PnX2sOTKlqDTuTlOcIcEwkzgv+fHYR44sw9nvnjwqB/SqVMnBxIYDhAsloIsXLhQkKUAv36xCMtqvCJAH3LJJZfo16ZN0+LPPvjgA/3e/GOt1mWM4DfeeEOwmMsqhw4dcrw1v7atmQ0cJ7lzEQHEEMNTbiqlWRt8++23OuYYcctGcB+vvfZaQXYIGJfjx4/XBib2ce+9JSE3ejy9njx50mURCVd9fv75Z0FVtPfff9/V6aCO4fOHBYjOVdiCUhqBzmfPp8gf/06UCX/eIy/NLiUfLG4lv215OapTWwWCqXGpu6R2hdZy6hB9G4HwC3UfLNotXTW35Pwv9betQxYtl00brlalk9b1lRFLWgteKSQQSQL8VooQfZOn1Riu8LCiZjRKwJowgRw5ckiDBg0EpWKxCAoLsFBeFXlZkRYLhs9zzz2nrwAZBCAwej7//HP9inRbCLBHrOy0adP0efxTrVo17Wl97bXXdJjC/fffr48hZRaMLhiq8PyZRVfwAiNnrLMx7FDIHQcBeLpvv/12nX4sa1ZVHNyNIGYZVW/wOYDXHZ7OevXqCQxbeEe3bt3qNhG4G5VBH4anHz9cTH7eoBVaFCA38TXXXCO9evXSadgsp6J+99CpHWkLq1QIwLp9k+V86tmon3MwE7y6+vvSrMwD8kuJOwVx8JToI4CUWYf+dZOrzIbpHj94XhITE23QRBUkYD8BGq72M/VJIx4Lw6NZtmxZR/sbbrhBZw+AEWukYcOG2rhBOb57771XG7CPPfaYwNiEwDMHL5Z5tAujdM6cOdpAgAGEDW3wRYe4WrNIC541ZDp48cUXHSm60AbFEmBUwXBF7Cy8gDCOkcaLhqu5K55f8SNh2LBh8uSTT7ptiB8DMFrxw2Tu3LmOqmnIuYsE5P6ILwUp4PFECjV8BvAZqVq1arrSw/iRg8wV+FGEe+8sKPuLH0AwZBBqgs+bczssLNyyZYsOiciePbs2xBHCAq+vCY2xFtfAjzb8OItG+fvIUkGsKrIA7Dj8X0q6aJyrXXNCNS8UFCiT0ESrRKiKtUiKXeNY9cB7l3xslT50XyOuMrey8bSP741jB8/KPpUWvEg5Ty39P3fyiCij+Kzgbw+FBKKRwMV/oaJxlhlwTq7+IMDTZY0pwmWPHDlSbwbBPffcI9jgGYVh6SqOFWEDqIhlQgqcF2BBF4yOJ554QsdcwpiAwYTNWeAFRoYC1MKm+EYAP0gQO4x72aRJmhHg3BMlciHweltL/Tq38/Qei/FwD+Gttcorr7yixzdpznAO3k6rXH755dozb7wq3bp100Yn2iD2GTmIjSB2F0UnEANrBF5iePZNjDYMVizsm+W0YviFF17QY5vUcDDqjfcfBrf1h5vRHYnXVEmV9dqjCq/qL3Lw5LZITCNiYyI04BpltMbF/feUABX5Qv3/HnGTWw7Ojth1x+rAFStWlKIlEyR57WHbDdd/VZq1QsXyOkLVYpUR551xCTDGNYruLbxUpnyqt2mhypUro9XaDwarK6PV2gbGDTytroxW0w6PvI2BY47x1T2Br7/+WnvTYSwePnz4oobwfpoY5zZt2lx03tcDvhSkMLpgqM5UeRMRh4t95Azu2bOno0raRx99JOPGjTPNHa9IxYaQBgiMUPSHgYr5I7QAoQ3wqCJOF0Yr8gejIAWMUxi7OF6hQgVBjC8EYTA4j81VqWLdKEz/HD29R5b+86mMXXWdDJiRTz5d0Vl+3/l+pjNar67+gVxb8+N0RituATyuDBUI04cxgGEaNKynDNcAOnrpsn+nSN36Nb204mkSiBwBelwjx54jZ1AC+IMPI7Bz587y4IMPCopEWAUebghCOJzjYGEcmvN4lN6nTx9rV8c+jEVfClKYDo888ojDmw/DFYv4EKIAAxbZBnDMeaEe+j777LNaxYgRI3SBC7xp166dHDlyRMdjIxcxnh7A4wqvLRb8GUEZYSNmQSCMVRM7bc6F83XX0dVpWQCUV3XrwbnhHNrlWJUKXSEo/zpn2xtyJCXtc+GyYQgOJsbXvxAacHGKPQyHzzFCRHCvPf2wDcHUqNIHAs0btZHXXp8nDbqek2IVfejgY5Nti3LLLZ3/WyzsYzc2I4GwEaDhGjbUHCgzEUApXRitWKEPr6NV8uXLp99aMziY8ygLbLyx8HS6M1xhKELwqB4GhhEYuwhVMDrMcesrwkQQbvLAAw/odp7SZKGsMARlgBGXa2Tt2jRXD+aBVF6Q7t27m9NR9bpx/3SHsbr3+PqIzg2phKoV6aSN1arqNU/2Qno++09s1t5efyZ3prjIwVvSepyq609PkSal7tZGaxyqDLgR87mC15WGqxtIETyMBbnf/vCZzB+7Ra5OW5sb9GyWqwihxCKVdbnwoJW5UBCflCQlVRw9hQSCIUDDNRh67EsCHgjgUT5ytCInKx6XG0E4CIxLxIwilMC6GA8LoHbs2KHTn5n2rl4PHFC1OJV4K0jhqi+OYdEUxJXxrE+of7BYy8S1mvbmHOKoscFwNiEG0bKY4+SZg2lZAJRXFVkA8D6SUiRPFW2owmCtXDj9jxgzr6pFVAiFClPwR87CcFXlOf2Vq2t8IM1Kpy3u9NTXhHIgzhVPByjRR+DjD77UTzC2LhUpn5bqO+BJHtmrKi++J7Jp03cB6/DWsWqvXoKNQgLBEKDhGgw99iUBDwSw6ApGHQw64yE1zVu1aqVTZg0fPtzxON6c8+UVMc4QswDP9HEuSGGOO7+aXLOmGIXzebzHin+TjQJZJZyzCJg+ScqLApk3b54ju4U+4OKfU6dOuTga/CF4UrGoCtsm5WGNtCQVbJXmWVXGasl47+5QhAtkz5pbzpw7GbKpl8rfQHtZS+f3zcJBDD0WgDLONWS3JGjFCPl5ZsAT8uYrw6TPhPNB6Rv/ZFYZOWaYYOEXhQSimYD750TRPGvOjQRihADS1iBzgLOYvLtINYbE/FbZv3+/9a3LfV8LUpjOSHFm5MyZM45MFaZghTnn/GoWj8HAtop10ZkpL4xYXqTEMgIj1RRRMI+aYdzaJVsPzpHJG5+WNxfUkdfnV5Of1j8aMaM1e5bcUqv4NWqR0yh57rJkeaDxXGlTvr9PRit4xEkWbejaxcZZT5PS90ifZkvFV6PV9Ee4AA1XQyM6X196YYhKY9dG3umRRfZs8n+OGxeIvNk1TurWbCb33PaI/wrYgwTCTIAe1zAD53CZjwDy7qK4A1bdG4HBiFRWCCdA9gE8dodnFgUMUKbVmyADhS8FKYwe5P3966+/BP1gYCIGFseqV69umrh8RWqt77//XmcLQPqrm266SRCmgBRcyB+MuSMU4r333tOFMi699FKdPg2puhDfe+edd8rgwYOlYMGCOs0WyhqjvDFiccuUKSO33Xaby3FdHTx97oQjVhU5Vo+prACRlIK5ywniVHXMqnr1FC/qyzyha82eCb409atNjxofStPS9/nVxzSm4WpIRPfr5J+my8+/fi+dO14tLVX4SJPrRLLn9DznlGMiv40QWTNVZMgbL8gT/Z7z2OG+xrM8nudJEggXARqu4SLNcTI8AZMhwBR5MBeM42PGjNHGmjmG1yFDhugYVXhdscIfGwTxpCg2geponsSXghRGHwxlU2UN8bUwJlFlzSpWb6k5XqpUKV1yGEYu5mcqviFHrQlXgDGMVFt9+/bVuV1R6Q2CMANsRjAmDPOvvvpKH0I6LW+G64GTW9OMVZ1jdbLql2rUReQVyflhqCIm1STqt2si0Gun+Bsa4GrscORydTUuj/lP4KoO3bV3/J4+PWXCE8skIemoFC4jUrS8+k5RT/9V5kPZt01k7/a0130b80pSmcqyceM3UqlSJf8HZA8SiBCBuFQlnsbGH5tBgwbpJl6aelLDcySQ6QkgrhQeLF9x8OoAAEAASURBVGthAAMFj9SxKMtbTl3T3vnVXUEKLL7CIisYwxgDj/iRk9fVHBCyAA9qL7V4At5UZ0FqJHhb0d8Y6c5tkKMW1bngYbUuOrO2Q7ovGM8mfMB6DvuoVIWKVfCqooJVJAVeVONRhUcUXtZQyodLLrUlTRdCA1BQAEEIwQgyT+zcuVN++umnYNS47asrZ6kiBBB69Nxi8vsEspPMXzRDfl88V/5atVmOHEqLnU4olEfqNqoiTRu1kuaNLte5mP1Wzg4kECABhJ6ZJ4/B2JP0uAZ4A9iNBPwlYDyUrvohRVaNGjVcnfLpmDvd1iIVGMOk4rIqhWGCSmsvvfSSPvzQQw9ZTzv28+bNK9g8CRZwmcVa7trBi2uV86ln07IAaK/qL3Lo1E7r6bDvx+csecFY7ShVsWhKxa+GS+DJDTa/LMq2Ni19ry1Txg8tk+7MFoVOSrpUe8vpCN/aQQA/QLEZufvuu+WWW25x5HI2x/lKArFIgIZrLN41zpkEbCRQs2ZN/QgfKuFhM8UCbBziIlVHUpK1RxVZAOBdPXs+5aI24TyAlf/as6oM1aQCrcI5dLqxMIdfN6YP4UjXwPImx2aRwh+mHTimsmzlv7ah9rKWyt/Q0iq43UiFCvhaBhb3LXe2Am4v0lc9BXOVU970JLd6UJr21NmLq+A5dwiXHpQkPnhKPfP3Isgb/NeifTpu3lVYjj96EuPreRnN++lkFee/60Ju6IYqDy2FBAIhQMM1EGrsQwIZiMDLL7+sH+tjcVgwXl9vSJKPrnAYq9sPqaXMEZbKhdtdMFY7CnKtRoPAEMNc9p3Y4HU6WY+L5F6d1qxwq4Zyq8oaYLdEanHWiCWtfbqU+xrNlAqF3Lf1Vc+VFQdK24qD3I6JkAZfjOBw6VmaPFqmbx7sdr7mRIWCl8mwB5N1KjuE8DiLP3rsCOWA4bpMhR9CaLg63w2+95UADVdfSbEdCWRQAr179w7ZlW3YN8WRX3X/iQBy9dg4M1Sp0lkAlFcVnk14o6JRkNN13nbvhqt17tWLdra+tW0fhivilvfu3StFixa1TS8VhYfAhHe2yPbte9waruGZBUchAXsJ0HC1lye1kUCmJnD8zD5ZfyFWFWEAKWePRpRHsbzVtZFatWhHqVToiojOxdfBYVTP2/6W1+ZpXmL/DFyvSp0aWKtnhdNwhefSF/H0eB/9fdVTsWBrj8M1LNXLo2fXdA6XHj1ORTOq69fl87fItLGf65MoJuLK4+qLHijwxtn1DHiUBEJDgIZraLhSKwlkGgJ7jv3lyK+6+cDMiF83Ho/Cawnvaol8tSI+H38ngLKwuVTc5qmzh9x2RV7WZjl6yiRp47aNHSfgcYWgCAFy74ZLPD2292cOdulplNjLn2HdtrVLD8IjPIVIYAIPdfwv/AWZRZC1xFl80ePch+9JINIEMo3huuXALJ9ilAqodDeevlx81YMb6+nXPoLilyWP8en+R5uehom3e/wF7kvsFS7cmx6f4LBRRAjAQDULq2C4RlJyZM3niFWFsZovR7FITseWsaspD/HKXeNc6IrTC7CQ7ir531kuztt7CCVfkZmC1bPs5RpqbShusn379nTDbN26Nd17viGBWCWQaQzXzQdn+RzM7slw9VUPPhDeDM5pmwf59LmJNj3waHl6dOTrdXnT4xMcNgoLgZSzR5ShOtmRtgohAZGUQrkraK8qHqsjhVRGE1yXs+GKcq1IdYXCAuGUSC3QCuc1ZqSxpk2bpqvqOV8T8idTSCAjEMg0hmtGuFm8BhIIJwEspoJXFRsWWUVayhZorjyrHbV31c6UT5G+Llfjw3NslWYqNOBqVbo1EhKplFiRuNaMMKa7PMz79+/XC+2Qa5lCArFMINN8ghHrZEe8k116EFv0WrvUoD870aYHFxTMdZkwA28hG0GDowKXBJCmShuraoEV0ldFUrLGZdcFAOB9xJaQ67/ysZGcVzjGRgYELCZDSEYPZbAiNCBSQo9rpMj7P66rEAGrFizQ8lYgxNqe+yQQjQQyjeEajfA5p4sJmDADhBF4Ctm4uCePBEIAif91eVV4VpWxisIAkRQYp/Cq6rRVyljNmiVHJKcT0bGxwKxTlSFhDw1wvmgYrn/++afzYb6PMgIIERg2bJiglGauXLnk1KlTF82QhutFSHggBgnQcI3Bm8Ypk0AwBFBSVS+supC2CiVXIymI2UyLVe0k5VQ4ACWNwCXlHvWIIj4pyZHEPbF1a49tgzmJUIEZM2YEo4J9w0Cgbdu2kpKSInPnztXbqFGjBHGt58+f14Ys8vG6SokVhqk5hsBntuRllznec4cEAiGQKQxXVE9B1RN48eyo/hEIaPYhgUgS+PvIUu1RhXd1x+FFkZyKHhsLqoxntXAeLwkpIz7b6JwAjIBGgwaFfHLwuO7Zsyfk43CA4Alkz55dLr/8cr1Nnz5drr32WunRo4c2ZGfOnBlxw7Vqr16CjUICwRDIFIZrMIDYlwRik0Cq8qqqLADKqwpj9cDJyKbCyZujqPaqas+qegSeU6WwosQGARiuqJwFjx0X9sTGPUOYwLx58+TRRx+VVq1a6a1///6xMXnOkgS8EKDh6gUQT5NArBA4dnqPNlR1GIAyVk+fOxHRqRdXyf91FgBlqFbwUpkoohPl4B4JwHCFwOtaqlQpj215MjoIwGiFwGilkEBGI0DDNaPdUV5PpiKw+9gah7G69eCciF97xUKXp3lWlbGKcquU2Cdgyr6iCAEN19i4n4hzrVu3roSzTG9skOEsMwIBGq4Z4S7yGjIVgU37p6elrFJe1b3H10f02nNlS0iLVVWGKsIA8mQvHNH5cHD7CRiPK6tn2c82VBrhcaW3NVR0qTfSBGi4RvoOcHwS8ELg5JmDOl7VpK3C+0hKkTyVtZFaVRmrVQq3i+RUOHYYCGTJkkXgdaXhGgbYNgyBLALwuN577702aKMKEog+AjRco++ecEYkoD2pJlZ1o/KwRlqSCrTUJVaRXzUxvl6kp8Pxw0yA1bPCDDyI4eBtPXPmDD2uQTBk1+gmQMM1uu9PppsdUpZBMqNxtPXg3LT8qioEYNfR1RG999my5HLEqsJYzZ+zZETnw8EvJrB/5UpZ0K+fPlHl9ttDmmYI4QL0uF58D6LxCLyt1atXj8p45ORZs2TX7NkaW8OBA6MRH+cUAwRouAZwkzZs2CDLli2TpUuXyJJli6VFsxbStGlzqVOnjlSsyJyUASB1dMlMeXbPqFX/uryqMlTXq7RVR1VWgEhKgVzllFcV+VU76cpVWeKyRnI6HNsLgZRDhwSGAMTupO6oa4/vufXr18u6P/6QtWr7c/VqScidW2rUqyeVKlXS33WFChXS4/Of6CEQzfGt+LwuGzxYw6LhGj2fmVibCQ1XP+7YE088Ll9+PUaSd+6TnHmySLFK56VsXZHx0+fLW8PjJOV4qpQqV1S6XXWNvPrqEImPj/dDO5tmBgIHVT5VnV/1grGaKqkRvezSCY3TPKvKWC2T0CSic+Hg0UHg9ddfF9S8h8RnzSrFzp2T8mo/Tm3jhwyR3Srm9YiKo4Q8/vjjMnToUL3Pf6KDAAzX999/Pzomw1mQQAgIZArDtWGpXlKhUGspmDspIISrlafhlru6y96DO6V6x7NyaWWRsnXSvrj/U5gqyWtFbXvly+9GyvgJX8rLL74u99xzz39NuJcpCaBSlV5YpbyqqGAVSYmTLNqrisf/8KwG+n8iktfAsUNDAHlab1NVlmb8/rv0VEMgmVlxZbReJMpo/VcdXKW2N4YNk2+++EJGjh4t7dq1u6gpD4SXwMKFC+XYsWOMbw0vdo4WZgKZwnBtlNgrYKz/e3WQDOg/WNrcK9Kuq0jW7O5VJapvemx1O52XacOP6FWdY78cLbNnznffiWcyHIHzqWfTsgAoQxWhAIdO7YjoNcbnLOF4/A9jNXvW3BGdDwePPgKffPKJ3HXXXdJATe1FtXkLACim2rRVW/XUVBm7a5e0b99e7rjjDoEeSuQIwNuKcLXy5eEjp5BAxiSQKQzXQG/dDTddI7+vmCx9xovkTvBdS/ZcIp0eF2l5i8gPg5dKhYpJsmXzNt8VsGXMETiSkqyNVMSqwlg9ez4lotdQMr6Ow1gtX/CSiM6Fg0c3gTlz5mijtZeaZjM/p1patX9abb+p7dNPP5X69etLnz59/NTC5nYRwMIs5m+1iyb1RCsBGq5u7swX6vHX+HHfyZNT3DTw4XBCCZHbPjgtQ9pvl6/Hfy03XH+DD73YJFYIJB9doapWTdZhANsORd6rXrlwW4exWjRvlVjByHlGmECPLl20weqv0Wqd9uXqDaK1H374YenWrZuULVvWepr7YSIAj+trr70WptE4DAlEhgANVxfcDxw4IA/0vkcaXe3iZACHrn9FpOcNPaVVy1ZRmaIkgEvKtF027J+iS6wiZnXfiU0R5ZA7e0FtqOosAKoYQO5sBSI6Hw4eewTuuvFGOXHkiPSyYepXKB2Ie+2nEt9P+PVXGzRShT8Eli9fLvjbdcklfMLiDze2jT0CNFxd3LP7H7pLchc+KZff7+JkAIeSVOBYAxUf+3j/3jLus4kBaMg8XaZvTkuVUiB3OQkmNtkuYsfP7FOpqibrx/8IAUg5e8Qu1QHpKZq3WpqxqgzVSoVgKlBIIDACU6dOlU+++kquD6y7y17XqaMvTZkiw4cPl969e7tsw4OhIQBva5kyZaRq1aqhGYBaSSBKCNBwdboRmzdvlm+++l6utjk3chP11+HTu3+SDc9tkCpV+BjXCbvj7bTNg/Q+ChFEynDdc+yvtCwAylDdfGCmY26R2ilf8FKHsVoiX+1ITYPjZjAC34wdKzXUNeExv11SRinqrLaPVDomGq52UfVND+NbfePEVrFPgIar0z1E6qts2eOkbD1782vmL6pW6paJ04ULaLg6QY+Ct5sPzHIYq3uO/RnRGeXImjctVlV5VREGkC8H1nBTSCA9gZwFCjgKD8QnJaU/6cO7JfPnSwUf2vnbpLjqMH3TJjmnUmllVXlgKeEhAI/rgAEDwjNYgKPgc2p3sYwAp8JuMUwgUxiuk9b1leRjqyQxX13pUu0tj7dr6YoFUrJ6qiow4LFZQCcLlDojc36fIjequDJKZAmknD2algVAeVURAnD89L6ITqhQ7vLpjNWIToaDxwSBwqqCVddZswKaK3J9rtqyRS4LqLfnTqXU6VNnzujKWyg9Sgk9gT9UZTOU5I32+NaqvXqFtDRx6ElzhGggkCkM1+SjK2XLwdlq2at3L+q8+XOkVIi+a4uUE+1xjYYbnxnnsF8tpkLVKiysWr8v8otHyiY0U8UA0sqrls7fMDPeEl5zhAjgyRIkFB7XRKU3p6quhXKxNFw15pD/A29rsWLFpHZthhKFHDYHiDiBTGG4+kN5+ZI/pN1j/vTwvW2xiiKTv43sSnTfZ5sxWm4/tMCxsCr5yIqIXlSWuGyOWFVUriqQCxGBFBIIP4FFixZJ+Tx5JNeJEyEZvIQKEUBBgw8++EAqV66sN4RIYb9SpUohGTMzKU1OThZkEVii7uMilYd3jnpNVY6Zbqp6WcvLL5d6DRpI3bp1pXhxBG5QSCBjEaDh6nQ/SyQWkaP7djgdteft8YMiRYszZZE9NF1rQeL/9cqrisf/61QxgCMp/7huGKaj+XOW0l5VxKpiy5olR5hG5jAk4J5A6dKl5YB6nB8qOaTiWzupalqFCxeWjRs3yi+//CJbt27Vw2XLli2dIWs1bEuVQqABxROBIa++Kk/176+bgBYy5iIzRBa1bZk2Td5Tm/kL1l+1e/nll9UZCglkHAI0XJ3uZeOmDWX12h1Sv4vTCRve/rtZpHa9ajZoogorgUOndqYtrLpQtQolVyMpifnrp3lWlaFarkCLSE6FY5OASwINlEfusDJcd6mzJV22CPzgMdX18Pnz8sADD6SLuUxJSdFGLAzZDRs26P3FixcLir3sUmVjIfHx8Rd5aI1hCyM4M8uePXvkJuVRna3CPHoqEAgKcCbS9AIg5SORpWp77ZVX5Icvv5Qfpk+np/sCG77EPgEark738JLmbeXXF79XR73Hwzp19fr2wN8iPa6/xGs7NvBO4O8jS9M8q8pY3XF4ofcOIW5RtUgHweN/eFUL51ExIRQSiGICqGdfPH9+2aKKD9htuKqvOS3O+URz5swptWrV0pszmkOHDjmMWmPY/qqKGLz77ruCcxDEcBoj1rya8IM8KuwhI8snn3yiQy9USnB5UW2FvFxsQXW+rdqwXGPs9u2a2zvvvMNyvF648XRsEKDh6nSf6tapKweTU2XfdhEsprJTDv2TXVo2vdxOlZlKFx7/r7/gVT1wMu2xY6QA5M1RRBupxljNmS0+UlPhuCQQEIGGKivBFhUf2TKg3u477VanEosW1Yam+1bpzxRQqb0aN26st/RnRK+WNx5aGLXYxo0bp1/hxYWgxCyMWWPIWg3buLg4Z5Ux9X7y5MnaaO2lZu1vWd7Sqs/TavtNbSjHe9lll0mdOnXUOwoJxC4BGq5O9w7/qYsnFpbVk/fbVjkLQ6ydpVbaZssnDRty9bgTcrdvj53ek5YF4IKxevrccbdtw3GieL6aDmO1YqHW4RiSY5CAWwJHt22TDWPG6PPIjZnYurXbtq5ONFGLeF5TuVwvUfGoSa4aBHhsTe7c0rRVqwB7X9ytRIkSgu3SSy+96CTiZo2H1hi101SM5yaVRxaSRWU3MEass2ELYzcW5Jpu3aSjmqi/Rqv12uAuQQDVZc2by8HjkfsexWf2mPIAQ5jPVWPgPwEQoOHqBC1fvnzyzFPPyyOPPCJV1Hdv6VpODQJ4e0Y5BWaNyCKvv/aKjuEKQEWm6dKvxWq9qAre1RdnlYj4dVcs1MZhrBbPhzpDFBKIDgIwApYOGqQn03DgQL8N14Gqz0RVPet7ZeT1temSflV6NitD+IehQ23S6FlN+fLlBVs7FftplTMqftcYssawXbFihYwfP17++SdtwSbCC1x5aGHgFlUeY7ulU6dO8swzz0grP4z6zpdcIonqWrrZMBkQWq6ySDx0223y3mef2aDRfxXrR4+WZYMH6473+ZCe0v8R2CMzEKDh6uIu45HKF+M/lunvr5Fe77to4OehmSNEqlarKvfefZ+fPTNX8zPnTshbC+qq6GL744t9JZkzW35tqOosACrHap7szssffNXEdiQQ/QRGKMO1WbNmMkdN9WJ/pn/zhx8NqwO+++orQQxtJCV79uxSo0YNvTnP4+jRo47FYca4nTFjhnz44Ydy4MAB3RwLwZw9tOY9nBuByMqVK/VitVtuuUWw2h/z8ySffvqpzFVpruz6UYGxeqjtjc8/l2Zt28qtt97qaXieI4GoJZApDNeGpXpJBfVot2DuJJ9vxOQfZ+lULhvmifa8+tzRqeGWJSIrfxZZsiQyv3CdphPVb7NnzSNVlbG4bq8CFkYpnKdSmrGqxq5SuH0YR+ZQJBBZAk2bNpXnn3tOXvjf/wQLfwIzyUT2q76vqO267t3l6quvjuxFeRkdmQsQsuUqbOvff/+9KPTg22+/1YbuyZMntWakEoMRawxZs49XpPpyJehrMid88803Mlb9YICDBAYswiBcyS/KO9xWeVtR0MEuqaIU1VfbJHVNNFztoko94Sbg+n9ZuGcR4vEaJfbye4RChQrJe++9Jw899JA0v1HkEv9VyET1RGTjgrShsfAAj4rMhsdblIsJwNMZDsM1qUDLtCwAylhNjK938UR4hAQyCYHBL74oy5culcfVKv7b1DX7m8Dtc9VnvtqKKi/l+IkTY5oaMhdga9ny4iVr21VspvHQmsVis2bN0seQ/B9iNWKthu0Rlb3BiFlQNnLkSF2g4dlnn9UGbI4cOUwT/bp82TK5Kt0Re95gzfGShZHPxGLP1VBLZiSQKQzXQG/sgw8+qGOnul7TVkbfv1Na36sWMcAt4UX+mikye1ScpJ7KJZ9++r5cd911OgH3zz//LINVfE/v3r2lUaNG2oi96qqrpEmTJl40Zp7TMFxDIdmy5EyLVVWGKsbIn9NOP0YoZkydJBA+ApPUyvWpU6dKe1U0YJUatp3avD3sX6vawEzdobbeKmfru+/bEFeldEWrlCtXTrBdeeWV6aZ4XuWsNYasMWzXrFkj3333nezcuVO3RegCshsYAxcHT506pc+hQADSfiHmuE+fPvoYcrZu2bvXVm+rVqz+qaC275VnGdW3EhP5PWi48DV2CNBw9XKvUJ7wr1Vb5c13hkr/p5+V8g2ySP7EFJ0qq5j6BkAZ133b1KYCvPaq14M7ssuG38/KvfffIR8MH+XQDuMVGwS/0lFJZsKECfLCCy/oL0PjiYUhG+vpWxwXHcAOwjmK5Kki+05sCKB3+i4FcpVNZ6xmicuavgHfkQAJOAhggdPu3bulzx13yFBlyJZThlZZ5UmEoVNFbVnUtkVtWy+8Yr+lSqn1lTJYm6vV6plVkLmgWrVqenNmcFyt4IcxO1QtVsP3vfG2WtudPn1a9u/frxcEI9fqoEGDpEiRIpJflc0tqBa62S24l9nUvUXJWBqudtOlvnAQoOHqI+V+Dz8hl1/WTubOnSuzFvwsKyatkl/W73L0rlyzlDRu0kBuu7mTNHm7iaAyjTtp3bq1YBsyZIj89ddfAk8sDFnU9c6tUsnAiIUBi9fMWGsai7QClaJ5q0ndEtfrMICyCaaOTKDa2I8EMhcBfN+Mv/CjetmSJbJALVr6Ye1aOXQhhVLBvHmloVpUdFObNtJALeqK9njWSN+9vIpXPWXc58qVS86eRUIq14Lz8MAijdfTTz8t+VVxiARlEEsIDFfMoIrKqLBMhSJ07tzZ9YR4lASimAANVz9uTt26dQUbHvVDsDoV//nx2D/QlaZm5esTTzwhe9WjIRiw2BC4f+edd+rchcYbW7s2ivxlbPlhbR85nPJ3wBe59/g6OXZ6r/LaVgpYBzuSQGYncM011wg2I3gUnlV5ACOdLcDMJ9ZeETpw7oIRaoxUXAM8qwgVwyI5/B3BgjH8eJgyZYpcrxwXoZINKi2Wq8VpoRqPeknATgI0XIOgidWp8JzaJcgdePvtt+sNOo0RO2LECP0rHI+jjCf2cpU8PKMJCg4s2Dk86MtauPMDWbX7K2lX6UVpUeahoPVRAQlkdgLId0oJnAA8qQkJCdpYRFiFyWrgrggCvLRHVOzsQTVkwcCHddkTQVhnVQiIp6eCLjvyIAlECQEarlFyI1xNw3hacW6pWvVrQgqGDRumU3WZ8zBmYUTHuuTLUdy2Szh55qB6xNlbVu1KM2BZ6co2tFQUJQRyqjKppvpQfFJSlMyK03AmcPDgQVmk8rEiy4CvAq9reZWlIVnFvtptuCI2uZxykjC+1de7wXbRRiBOrXJMy+PhZmZYBY9gcYiXprpNNP4zaV1fST62ShLz1ZUu1d6Kxin6NSekZTHeWLxiVWuHDh0cqbb8eZz3ww8/yBVXXBFwqINfE/eh8VNT43SrfDmKqUf+//rQw7cmTUvfJ+2VBzZvDvsr4vg2A7YiARIgAd8JXKuKBMj06aL+tVVUPRypoJwd3/70k616qYwEvBFoo2LjsTgdEow9qaK/M74kH10pWw7MErxmBEFKlgdU+plJkybJsWPHBAmtS5YsKS+99JIgCwIeQw0YMEAW+pCrD4ZvrVq1ZPbs2VGFBousnmy1US206mnLvBb9PUKGzKss83e8a4s+KiEBEiCBUBLo2LOn/JYtmy7uYNc48LauUFvX66+3SyX1kEDYCWQKwzXsVMM4ILIQXHvttfLJJ5/oVDYwQNuqX+rwpCKWCjFU999/v/z4448uV7VOVAnD4cFFrO4bb7wRxpl7HipO4gQVrW6qM05ur/e9lMgX/MK0U2cPy4/rHpb3F7eUTQd+8zwBniUBEiCBCBK46667pJ5akDvOxjn8qnTd1qWL3HbbbTZqpSoSCC8BGq7h5R3y0S699FJ59dVXZfXq1TrV1iOPPKLzCHbr1k2HA/To0UNGjRqlyw8uWLBAZzIwk3rsscfk5ptv1l5ccywaXmsU6yb9WqyWDpVfUjlug//Ibj+0QD5aeoVM+OteOaoWhFFIgARIIBoJjFWP8/9QE/vdhsmtVjqwvTVmjA3aqIIEIkcgeCsgcnPnyF4IVK9eXWCMzlC5GPft2ycfffSR5MyZUx599FEdmN9TPYrCe6ug2gvSbs2ZM8d62O0+UrysVXkev//+e52XdvTo0TpEAam97JY25Z/R4QP1St5ki+rFf38kQ1X4wLztb9uij0pIgARIwE4CWED1+uuvC0zNY0Eo3q/6oq4ZwssKFrR7uVcQE2NXEgiAAA3XAKDFYpfCaoXqrbfeKuPGjZPDhw/LZFUZB1VdnCu5IAn2tm3b5LLLLpM333zT7aV++OGHUqdeDckbn1uQi/b6nj3krVH9pV//O3WIAup9J5YtKl27dtZFG9wq8vNEodwV5MbaX0iv+j9Kyfi6fva+uHnK2aMyaX1feW9Rc9m4f/rFDXiEBEiABCJIAM6HLmox1eNqDgsCmMfnqs+zauvYsSMLDgTAj12ijwDTYUXfPQnLjLCI68CBAx7HgmcWBRaQRxYVYCBYDPZg317y+agJ0vJWVdNc5cgumqQSaSchOUVagopTR9PK3+7btk9WzvpVFVH4Wfr162drDG31ol0E26ytr8qUTc/L+dQzmF7AsuPwQvl4WVtpXOounf81f86SAetiRxIgARKwk8CPKmRg6tSp0r59e1mlFLdTm6o27lHWqrMT1bZDbSgl26dPH4/teZIEYoUADddYuVM2zxPZBHLkyCGok+1JUF8bsbBjVFwUvLGoZ14wUeRmtY6rVE3XPXOplLJl1FoqbPW7nJPp6hkVvLffTPhK5s35XZAVwS5pXf5pnXlg6qYBsnzX2KDVLvlnlKN4wSXl+gWtjwpIIFQEjqonIxsuxCsin2ti69ahGop6o4AAvnt3794tfdVi26EqNKtk9uxS7MwZKaHmVkZtSCS468K2W537R51rqz4XMz7+WGebUaciLvjMHlOLgSEmB3HEJ8UJxBwBGq4xd8vsmTCyCTgbrShFmEfVsEYxA2wFVIJzbIiJevHFF2XatGlSW/3U7/iYf3O48kGROu1Fxj+zS5KSknSpXHclcl9r5zGtsMuBC+ZOkhtqf55mwG4eIP8cQcKXwOX0uePy0/pHHQZslcLwb1BIILoIwAhYOmiQnlTDgQNpuEbX7QnJbFCYYJz67r5RZYlZt26drFmyRNb++acs2rlTssTFSWWVRQaZCGqqMrJ4qtZFZRCIJlmv1kAsU7nhIfd5TiEfTdPmXKKMAA3XKLsh4ZgOYlyR8xXGaf78+R0bapG7EoQUIEa2fCP/jVajr5h6rtX7a7VA4KY4uaprO5n9WyDRWkab69dqRa8SbLO3DZGpKnzg7PkU1w19PLrz8GIZtay9NCp1h7Sr+IIk5CrtY082IwESIIHQEejatataP9A1dANQMwlEMYFMsTgrMb6eVCioHqWpV4romtktWrTQ2QPw2B4eVXdGK3h1ubqd5FULUa97KXh6PYekypyZvwsWd4VKLkt6Up5otUEaJtqTq3DpP5+q7ANVZM62YaGaMvWSAAmQAAmQAAn4QCBTGK4o83pf41kZotyrD/fU1ibDhg2TBXOWyeX326O2kHJatu0tuvIXYmdDJQVylZXra42ROxv8IqXzK1dxkHLm/En5ecPj8u7CxrJ+H9J4U0iABEiABEiABMJNIFMYruGGmpHGGzX6A509oHpr+66qvgq7qto4Xr766iv7lLrRVLVIR+nTbIlcVWWoZM+S200r3w//fWSpfLK8o4z/o5ccOrXD945sSQIkQAIkQAIkEDQBGq5BI8y4Cvbv3y9r/9gsJavYf415ShyVNWuX2a/YjcZLkx7X4QONEnu5aeHf4WXJY3T4wOxtQ/3ryNYkQAIkQAIkQAIBE6DhGjC6jN9xzZo1+iJLVrX/WguWEtmwYYP9ij1oxOKq62p9Knc1/FXKJDTx0NK3U1j89cuGJ+WdhQ1l3b6ffevEViRAAiRAAiRAAgETYFaBgNFl/I6rVq2S4hWzSO6E87ZfLIoW/LZjn5w8eVJy5/7vEf6Wg7P1WLmyJYRsMV2Vwu0F29ztb+jsA0h/FYz8c2S5fLq8szQoeasqXvCCID0XhQSihcDpQ4dkv/q/7It4yq3pj57CdetKDpVKz53smp32/9zdeXPcLj351CLUeJWKz53sX7lSTqtsK97ELj05EhKkcD33i4Wt+U49zSna9GCunj5Dnq6F50jAVwI0XH0llQnbLVq8UEpUsd9oBcqi5dOAbty4UerUqeOgO2JJa72PLBBYUBdKuaTcoxeKFzwvKDwQrCzf9bms3P2VtFfGKwojUEggGgjsU0bZpDZtfJqKp9ya/ujpMnOmx7yyP7Zu7dN87NKDPLeNLuS8dTXw/L59xRdj2i49MO66zprlair6mDXfqdtG6kS06cFcPX2GPF0Lz5GArwQYKuArqUzYLktcFglVjmijN04lzY6k5M+ZKNfW/FjubjhVyiY0C3oqKD07eWN/efv3+rJ276Sg9VEBCbgjUMTisfNklLnrz+MkEG4CprpbvI3VE8N9DRwv8gQyhcd1afJoOXRyuxTIXU7sWpwT+VsX+hm0aNFSpg5VVQPknO2D/bs5TWXVqiEIoA1gtpULtxVs87a/JVM3Py8pZ48GoOW/LslHV8roFV2lfsmbVPjAi1Iod4X/TnKPBGwggEfx165YIVtV+U9Pgkfk8BQGK/7o8fRYHvPwdT526TEGkzsGVXv18ughNv3s0uPturyNY+YTbXrMvNy9Yr4tVPnvpO7d3TXhcRLwSiAuVYmnVoNVebZBFx6xeGnqSU1Ez+HxM2Inw/H4OaIXavPgyLPasmVLeWCsSHxRe5Uv/kZk68ySsm1TcjrFT01N88BG8l4dTdmtjdfFf3+Ubm6BvolTnuv2ynhtU/6ZQFWwHwmQAAmQAAnENIE2KmRp1oUQmWDsSYYKxPTHILSTR+xp9hxZJXmt/ePs3ylSo3ot+xXboDE+Zwm5psZIuafRdClXoEXQGlNTz8uvG5+VNxfUkb/+/SFofVRAAiRAAiRAApmVAA3XzHrnfbjufPnySb36tWXj7z409qPJ6RMih7bFS+0a9f3oFf6mlQpdIQ82mS9dq70jyHIQrOw+tkbGrOwuX66+Ufaf2BSsOvYnARIgARIggUxHgIZrprvl/l3wE489I3/9JvL7/9s7D/goiv6NP5FeQi8CUkOX3kGQoBQLqC9/C+iLoqgoIgL2ggRFfO1iB1HBitgFRYoaeg0dpHcCSO8Q2n+fjRsvx93lyl6uPeNns3s7M7+Z+c7hPTv7m5kvfcvnKfVsw1bOs4UwcOBAT8nCJu6yCg+amxc0v6S3LXVaaqw88PLMavhj41Bb7MmICIiACIiACMQKAQnXWOlpP9t500034bbbb8KMMcChXX4acci2eRFA/9YP3v0YpUuXdogJ78uCuUuha+0PcG+TP1CpSGtbKjtp/SC8PrsOVv79gy32ZEQEREAEREAEop2AhGu097AN7ft8zDiUKFUEX9gwQDruSWDgI/3RsWNHG2qW/SYSirXD/c1m4Ppa7yBfrqIBV2D30ZX4dElXfLHsFuw9nr07iQVceRkQAREQAREQgWwmIOGazcAjtbg5sxbg6D7gvVuB/dt9b8UsY2WClzul53vtlTd8NxBmOVqVf8B0H2hR/n5barZs1zi8MrMGpm54zhZ7MiICIiACIiAC0UhAwjUaezUIbapatSqOHDmCenUaYlQvYLGXa+tzRYKvHgFmfQbcdMv/GRsaeFx9Db2b/GkeXWq+GYRW2GuyQK4S+E+t94wdvqahctHLbTE+ZcNgvDarNlbs/s4WezIiAiIgAiIgAtFEIGc0NUZtCS4BrjIwK3kR3n//XfTp0xcb5+ZG4bJpKFYeKJVgHFWAE8Z233s3A3u2pJ83zM2BipUuwY8/Dsf111+fZQWrFEvMMk24JahiiNb7DPE6Z9v7mGz4rR4/bQxNBxD+PvYXPlt6I+qWvtFc/7VkgZoBWFNWERABERABEYgeAjEhXMvGNzB7zDpHT/eFpiX33/8A6tdviDFjxmDWvD8x9ed1F1SkVJkiaNKkMW5+rA0G27BrzwUFhOGNlobbQIOLb8Gk9c8aIvbdgGu4fPe34NE+YTA6JCQFbE8GREAEREAERCDSCcTEzlmR3knhXn+6EKSkpJhHhQoV0KhRIyQkGEOwMRw2H5iJSRsGYeP+ZFsolCxQw9w6tl7pm2yxJyMiIAIiIAIikJ0E7No5KyZGXLOzY2KxrPj4eCQmJppHLLbfVZsrFW1t+urO2z7CGIEdhGNpe1wl8/renmNr8MXSm7G0dFd0THgepQvW9jqvEoqACIiACIhAtBCIWeHKkbARC9t51Y8vdXQ/ocgXO5x45MmH8/HJcV7Vxy47Wb2CHrEgERsPTMuyTnbZqVK0rTHRKTnL8iIpATctqH9xN9P3ddbWtwOu+ord3xsTt77HlVUGGSOwWoEgYKAyIAIiIAIiEFEEtKpARHWXKhuJBLhdLLeN5faxCcWusKUJv2983lg+qzqW7vraFnsyIgIiIAIiIAKRQCBmR1yL5qtkTnoJtJN8scO0ngJHLr0JdtlJKJrosbjG5Xp6HCG2MttlJ6t2WeVF6rlikVbGzlu/Y/72D033gaNpuwNqyt7j6/Dlsm6GeB1r+r9eXLBOQPaUWQREQAREQATCnYAmZ4V7D6l+UUng1JkjpvvAzK3DbWvfFVWeNgVsHLxzObGtYBkSAREQAREQgSwI2DU5S64CWYBWtAgEg0CenPHgJgsPNJ+DasXb21LEHxtfMNwHqmHJrq9ssScjIiACIiACIhBuBCRcw61HVJ+YIlChcAvc3XgKbrx0FOLzlAm47fuOb8BXy27FmMXXY+eRZQHbkwEREAEREAERCCcCEq7h1BuqS8wSaFquFx5rvQ5tKg6whcGqPT/jzTn18du6p3Du/FlbbMqICIiACIiACISagIRrqHtA5YvAPwRy5yiAzjVeR9/m81C9eEdbuPy56UXTfWDxzi9ssScjIiACIiACIhBKAhKuoaSvskXABYHyhZuhV+NJuOnSj1E4TzkXKXy7tf/EJoxd/l+MXtwFqUeW+JZZqUVABERABEQgjAhIuIZRZ6gqIuBIoEm5O/Go4T5weaWHHW/7ff3XngkYPqchJq57AmfPn/bbjjKKgAiIgAiIQKgISLiGirzKFQEvCOTKkQ/XVn8VD7ZYgBolrvIiR9ZJkje9hFdmVMOi1M+yTqwUIiACIiACIhBGBCRcw6gzVBURcEfgkkJNcFejibi5zmgUyVveXTKv7x84uQVfr7gdnyy6FjsOL/I6nxKKgAiIgAiIQCgJSLiGkr7KFgEfCTQue4fpPtC20qM+5nSdfPXeX/HW3Mb4de1jOHPulOtEuisCIiACIiACYUJAwjVMOkLVEAFvCeS8KA+uqf4y+rVIQc0S13ibzWO6aZtfMVcfSEkd4zGdIkVABERABEQglAQkXENJX2WLQAAEyhVqhDsb/YJb6n6KovkqBmApPevBk9swbkVPfLzoamw/vDBgezIgAiIgAiIgAnYTkHC1m6jsiUA2E2hUpofpPpBY+XFbSl6z9ze8Pbcpfln7CE6fPWGLTRkRAREQAREQATsISLjaQVE2RCDEBHLE5cLV1f6Hh1ouRq2SnW2pzfTNr+HlmdWwcMcnttiTEREQAREQAREIlICEa6AElV8EwohA2fgG6NlwPLrV/RzF8lUOuGaHT+3ANyvvwqiUTth2aH7A9mRABERABERABAIhIOEaCD3lFYEwJdCwzG2m+0C7yk/aUsN1+ybjnXnNMWHNQKSdPWaLTRkRAREQAREQAV8JSLj6SkzpRSBCCFwUlwNXVRuG/i2Xonap62yp9Ywtb5juAwt2fGSLPRkRAREQAREQAV8ISLj6QktpRSACCZSJr4c7GvyE7vW+RPH8CQG34Mipnfh25d34MKUDth6aG7A9GRABERABERABbwlIuHpLSulEIMIJNLi4Ox5rvQ5XVHnalpas3zcV785riZ9X98epM0dssSkjIiACIiACIuCJgISrJzqKE4GoIxCHTlWHYkCr5bi01A22tG7W1uGm+8D87R/aYk9GREAEREAERMAdAQlXd2R0XwSimMDFBevg9gY/4NZ6Y1Eif7WAW3o0bTe+W3UvRi68ElsOzg7YngyIgAiIgAiIgCsCEq6uqOieCMQIgfoX32KsPrAWV1Z51pYWb9j/B96bfxl+Wt0PJ88cssWmjIiACIiACIiARUDC1SKhswjEMIGOVYfg4ctWok7p/7OFwuytb5vuA3O3j7DFnoyIgAiIgAiIAAlIuOp7IAIiYBIoVaA2etT/Fv+tPw4lC9QImMqxtD34YdV9GLGgHTYfmBmwPRkQAREQAREQAQlXfQdEQAQyEahb+iY8ctlqdEhIynTf3w8bDyTj/QVt8ONffXHi9AF/zSifCIiACIiACGjEVd8BERAB1wTaJww2BOxfoJC1I8zZ9q7pPjBn2/t2mJMNERABERCBGCSgEdcY7HQ1WQS8JVCyQE3TdaBH/e9AV4JAw/HT+4yR1z74YEFbbDwwPVBzyi8CIiACIhBjBCRcY6zD1VwR8IdAndJdzclbHas+50/2C/JsMkTrCEO8/mCI2OOn914QrxsiIAIiIAIi4IqAhKsrKronAiLgksCVVQYZy2etAZfRsiPMNdwGXp5ZDbO3vmOHOdkQAREQARGIcgISrlHewWqeCNhNoET+6ubGBdzAoLSxkUGg4cTpg8a6rw/i/fmtsWH/n4GaU34REAEREIEoJiDhGsWdq6aJQDAJcMvYgcbWsdxCFogLuKjNB2cZO29dge9X9cbRtL8DticDIiACIiAC0UdAwjX6+lQtEoFsJXBFlafxWOt1aHBxd1vKnbd9JF4x3AdmbX3LFnsyIgIiIAIiED0EJFyjpy/VEhEIGYHi+RPQvd6XuKPhTygTXy/gepw8cxg/r37I2D62Fdbv/z1gezIgAiIgAiIQHQQkXKOjH9UKEQgLArVLXof+LZfiqmrDEBeXI+A6bTk4Bx8ubI/vVt2DI2m7ArYnAyIgAiIgApFNQMI1svtPtReBsCTQrvKTpvtAwzK32VK/+dtH4ZUZ1TBzy5u22JMRERABERCByCQg4RqZ/aZai0DYEyiWrzK61f0cPRuOR9n4BgHX99TZoxi/ZgDendcC6/ZNCdieDIiACIiACEQeAQnXyOsz1VgEIopArZKd8VDLxbi62v+QIy5XwHXfemgeRqV0xLcre+HwqdSA7cmACIiACIhA5BCQcI2cvlJNs5HAiBEjsH37dpclDhs2DO++++4FcSdOnMDXX3+Np59+GjfccAPuvfdejBw5EidPnrwgrT83ypcvj6uuusqrrIsXL8bgwYOxf/9+r9JnR6LEyo8bmxesQ6MyPWwpbsGOj83VB2Zsed0WezIiAiIgAiIQ/gTizhvBUzWHDBmCpKQkM0kWST2ZUZwIRBSB+vXro1ixYpgyZQpy5syZqe5xcXGoXbs2Vq5cmXF/2bJl6N69O1atWpVxz7ro0KEDJk+ebH30+8xy69Wrh6VLl2Zp4+6778ZHH32Er776Ct26dcsyvS8JDh48aNqtU6cO2rRp40vWjLSr9/6KyesHYcfhRRn3ArkoX7gpOlZ9HtWLdwrEjPKKgAiIgAgEiUC7du2QnJxsWg9ET2rENUgdJLORT4D/wP73v/9l2ZAdO3aAQpei9dZbb8WSJUtw/PhxrF27Fvfffz969LBnhDHLijgkePTRR/HGG2/g2muvdbhrz+Uvv/yCPn36YMOGDX4brFniGvRrkYJrqr+MnBfl8duOlXHboQX4KOUqfLPiThw66Xqk3EobjufTp0+b3xl+b9wdZ86cCVrVp06dan7Xp02bFrQy/DF85MgRkI2CZwJ808P/Vy1fvtxzQsWKQBQQkHCNgk5UE4JHYNCgQZg9e7bHAp544gkzvm/fvvjiiy9MEZsvXz5Uq1YN7733ntfCla4G8+fPx3fffQeO4Lr7wd61axfGjx9vPrkeOHAgU92YZ9GiRTh27Bguv/xylzYoOH/66Sf8/vvvoC1X4dSpU2Ydvv/+e7P9tMewZ88ebNy40bymMGdZK1asMD/786dtpUdN94HGZe/wJ/sFeRamjsbLxuYF0ze/ekFcON945JFHUKBAAY/H22+/HbQm8Pv05JNP2vJmwNdKnjt3zmWWBQsWoFChQqhZs6bL77HLTDF6k29X2H90EVIQgWgnIOEa7T2s9gVEoFSpUrjlllvgLBAto/Qh/fzzz82PlkuNFefLee7cuahevTqaN2+OG2+80RS/TZo0wZo1azKZoaAtU6YMrrvuOvC1C90ZPvnkk4w0FJaNGzfOOD799NOMOI5e3Xnnnahatarpg9u+fXvT1rPPPgvH1zZ//vmnmYajyP/3f/+Hyy67DAULFjRFzQ8//ACmZ3jxxRfNcq6++uqMMvy5KJK3PG6uMxp3NZqISwo18cdEpjxnzp3EL2sfxVtzDX57J2aKi+QPdBWJpvDZZ5+Z33OKZleBbzIY+KDk7iHOVT7dEwERiG4CEq7R3b9qXYAE6CPKSVq9e/d2aYmjjgwUb8WLF3eZJqubFL8dO3Y0y+HICV/3URRSpN588804e/ZsJhNvvfUWKHRffz19UtJdd91ljp4yUdGiRTFhwgRwpNg5DBgwAKNHjzbdByhAOcGMYvn555/Hxx9/bCb/66+/cMUVV5h1eeedd8BRr2+++QbXX389KKT/85//ZNh+5plnMGfOHPz888/ORfn1uUaJq/BgiwW4tvqryHVRPr9sOGbacTgFHy+6BuNW3IGDJ7c6RoXt9R133GH2N/vc+XjooYfCtt7+VOzNN980v+Pu8nbu3Bn8ni5cuBD58+d3l0z3RUAEYoxA5lknMdZ4NVcEsiJAEffYY4/h5Zdfxocffoh77rknUxbLzzMhISHTfb5aHzhwYMY9jlxSnLoKXMGAo6H9+vUDRSgDJz7xtd+4cePAEVCOjjJwctaDDz5oXnN0tkqVKuboKVcQuPLKK0EXBVd+rRTBFMXMzxEua/SudevW5qgXR2179eplrohA4xQVDzzwgFkOBStHga1Qo0YN85JtbtGihXXbtvPllR5G/TLdzMlbC3f8O5rsbwEpqZ9iya6x6GRM3mpb6TF/zdieb8qGJFOgc7UFK7BfLrrI/XjCvn37wFfr8fHxyJs3r5XNPFPoWqtIlChRIqOP6YKybt06HD161BxJ51sEbwO/x/St5Yh7jhyZd0Ljd5Z14et86/tEu0y/efNm7N69GxdffLH5HXWMZ5rDhw+b6XhNO4cOHeJlRjn08+UoK98q5Mrlegk1lsN2paWlmQ9g/O47BtaNtumCwQmWtMcHM17TjcedXUcbjtesE9+AkHutWrWwd+9e86GPvqVW2WwH20omDBwtZp/w3ynfjjB4w4dtYr/lyZPHrOemTZtMRnzQZN9nFVJTU8ERa6YvXLhwVskVLwIRRcD9/yEjqhmqrAgEj8Bzzz1nCj4ub+W8aoD1I8IfNcdAH1EuhWUdnnzPOJmLgcLTMVhC1xrVdYyzrjkSyjrMmjXLFBHWfeezVQZ9WjlqyuW6eFiv/SlsGWiHwZX4NSOy6U/hPOVw06Ufo1fjSShfuFnApZ49l4Zf1z6O4XMaYfWeXwK2F6iBPcdWY+qGIZi47gms2P291+b4IEXh6cothQ8kjOMDBb9/q1evNvuYo5V0+6DLR+nSpdGsWTOPI52OlWnUqBGKFCmSMaLvGEdxxjjLnYWijaPCFIQUhnwoolsKxSt9pa3AVSkopqzvHCcv0g4Pa/WNxMTEjHstW7a0sppnCj9+b1kOV/do0KCBOSLLtxMU9lbgWwna5BuF4cOHI3fu3CaHSy+91Hw7YpVlpXd3pogkbwpg8mCZ5EiefPvBlXcYKNRZHtvGstkPPJo2bYoPPvjAFJ7e8KEtvk2hLbr9sDzy5AMkmVPMU8i6CnwDwvaVK1fO7GfaePzxxzMeElzl0T0RiDQCEq6R1mOqb7YT4KgHXQYYbrrppkzlcySFwXk2L0dX6Df65ZdfZkrv6gNHbhjq1q2bKdoa4bLiM0U6fKDPK4On9WK3bNlipuEPIEfjHA+OtHJ0lT/Qf//9tymEKTjCIVQv3hF9m89D5xqvI3eOAgFXKfXIYnyyuDPGLu+BAyc2B2zPXwOT1qf7CTP/14Yrw/G09O8ARwg5WuZ4cKTUCtZoOyf9Oft9Wu4eFEccFWRfchIeA8UTR98Y6P7BBxOO6NkZ+H23/K1ZljUaz3rwjYP10EcBSZFpPfQxHT/zKFu2rFmlhg0bZtTXuY5MRzHKwOuePXuatujSQhHr/BDJB8/+/fubApBL0zGQM98iWCO95k03f+gSY4lTcqRgZZv4QEkha8U5ZudDJ/uN7WY7+SbFWz6OdsiMQp/i3qo7VzvhgwiFsnOgQGaeLl26mA8xjOfbIj5AK4hAtBCQcI2WnlQ7gkqAox70+bR+fK3CKFz5w0QxMGPGDOu2T+cKFSqY6fkD5RhmzpxpfrTiHeOsa74O3blzp/kj5ckPsHLlymYW/qCNGjXqgoM+tRQUHLHjj7qzELfKczxT6GZXaFNxAB5rvQ5Ny/WypcjFOz83Vx9I3pT1cme2FOhgZMXu77B89zcZd9KMrWzX/rOFLVeU4GiZ4/HSSy9lpOWavPy+sY8cRwz5Wtia5GS5s3BVCb7GZhxXq+DIqDWiTr/tX3/9NcOuHRcc3aPby7x588yyOPq3detW8zU57VNUMfB7yuWb+BDFwFU5+JkHBRkD7Vg+3OaNf/789ttvpt8rP06fPt3MQ7HMNZX53WW7KOqdA918mIbMrFUxyHDiRM+T9/ia/5VXXjHNsT7kyH+XdKVhoKuC8zrPvM+HSbolfPvtt6Y45qizt3yY3woU9awvJ1my7hyltv6NvvDCC1ayjDOFNd+q0O9827ZtGSuacMRZQQSihYCEa7T0pNoRdAJcu9R5Bj1/hOlfysCRH+eloRxfXbqrIF/dMnDEyAocTZs0aZL5kaM6VuBrUsfAySv8AbZ8YB3jHK/5mpGBP77OKyQ4Cma+imbgD65jYDuslQcsXz3+iGdniM9TBjdeOgr3NJ6CCoUD9609d/6M8ar+Sbw5pwH+2uN6Znsw2uc42mrZP3Z6j3VpjgzyQck6KHqsQF9Tbi7BMGbMGOt2xsoWfCPg+KDD18QcxWTfUdBQEFvfJ0vAZRix4YKbcFjfZ36v6LJAdxaG9evXB1yCNaL73//+N9PmF9xVjpMPGay3I1ZhFHoWM97jQxzdZRisNxHmBxd/LHcGRnHinBWstZn5by8lJcW6nXFmeda/E0f/Xl/5cCkwuihYgW9lLPceCn3nwCX56MbAwAdRcmLg6LDzCL0ZoT8iEIEEckZgnVVlEQgJAf4AcYKTNTnJqgQFLQUkR7P4w8JRTf7g8Iea9xkcf7ysfNaZgpfbyHLUk6/7OTGL68Fy9IgjbJxQZQWO+PL1aNu2bc3RX2t0ia9DPQXWhyKGo3f8Qb3vvvvMOnLEiSO2/PHlyBHrMXbsWNAe/eg4AYyjZnzdyF3EKJApbjnqRxYU7lzJgILeejXsqR52xFUt3h48Zm4dbk7gOnXmSEBmdx5ZitGLr0ODMreaE7iK5asSkD1PmX/f+Dz+PrbKbZIOXeth8ndL3cYzgitccHMJPuhw+TOuZmG9CrZcCSwDHHVjn1qjndZ9nt2t4euYxtdrvh7nahV8QOLrdMdgLW/leM/Xa+vBkD6wzsF6OOPawlkFCngG54chBiD5AAA6LElEQVQ453yWGw7vc/TVEqOOLgbOD5NMawljXjsGO/hYPr/ky8lzjsLWsSxeWw8pvGbZ/LeqIAKRTkDCNdJ7UPXPVgL8IaOotDYdYOGcUUy/M/rdUSDwla312pa+fvRv4w5a7gLz87UqR2koSKxAEeL4mpj3OXrF16XW6CxfDfLVPyeBOAbnJbQYN3ToUFPkWPXkvUsuuQS33XabufQShStHoygOKI64ziYPBpbDERwG+vxStJKBtSh+yZIls024mpUw/rSu8BAaXNwNk4ytY+dv/9C67fd5yc4vsdRYfYBbx15R+Sm/7bjLuPf4WkNo/+vb6irdzqPLkJI6Bp42ZOCDU6dOncwRea46wYcljp7yAcdxC16ueEFfUY4KMvCBgw8i9Ht1dnlxVRdf73GlAE4csoQjR4y5pBW3KLbeHvhq0zk9hTgDX7s7B8dVFqwNM5zTWJ+9XVGArDliS5HIiVh0veC/E7oyWIGToZyD4yi5FWcXH0fxyXpZbkBWOY5nTytUOKbTtQhEEgEJ10jqLdU12wjwx9Zd4IgqZ/s6Bv6YcZIGD/6YcCSnYsWKFyxZ5JjH8ZoCkoKUI67MzxEha3KWlY7+dJUqVTLvczSWI23u/Fq5KxaD4w8868itYLlLE0e/OEPZWrbHKoNn/hDTj4+TXFgXvnq0lvux0vGVdNeuXc1RYcY7igYrTXacC+Yujf+rPTJDwG45ODugYs+fP4dJ657G0p3py2fVLpX+mjsgo/9kzkq0WmWMW9ETJfJXQ8UiraxbF5z5SphikCOt1qia9arcSsyHJYpWClYKXGudYfpF+ypcmSerwIcYilYKPS7hRuHKwC2CPQlXywUlK/uMp0jj63vHEU8rH2fyM/BtgKdRSCu9t2f6h/IVP9s3evRo898C/10w0O/d2+Wm/OXjXE/HSVmuBLJzen0WgWgjcFG0NUjtEYHsIGDNfnZVFn+4OVLjj5hjHvooOotWlsOZ/hSfdDugT5+zaOUrS7on0D+VvoD8AXdcf9WqK/NTKLsSrVYanmmfQtlZtFppWEdfxLmVLxjnhGJXoE+zWbiu5lvIm7NwwEXsOrocY5bcgC+Xdce+44H7Zq78+wdjNPdCn0R3FeVKA0fTMr9qd0x71VVXmX1IEUcxxb6mC4kV6M9I1w4GjhJaotWKdz5b3zdrHVgrnj61DM6T9Vy9YrcmOtElxRKtlh1XZ6tMazktV2mc79HlhcGaZOYYP3XqVPOjqxFQx3S+XvO1P915GPggQNHK0e1p06ZlrHVsRmbxx1c+7sxZk+r4b9jdv013eXVfBKKBgIRrNPSi2iACBgHOYuZMbY6G8geW/q+W8IgVQJdVeNBcfaD5Ja53OvOVA10HXp5ZDb9vHOpr1kzpXU3IypTgnw9njYUa0oxVqnbuW49P5//X7Ef2JQ9HX0o+wHDDCitwuSfHBxlLFDLeURhyZj1dXRgcJw5yeTQGxjn6vlpCka4pfNXNwNFOboPsHPjAxuA4mssJWtaMdke7TEfhxcA3DdYKFVmN7HLpNgYu/cXVC6zAiYLWqC7XW7YzcJUGPhzwYZBuNDw4gu3oluFNeb7yoU2+GXGcVMXRa2spsGjbSc0bhkojAiQg4arvgQhECQH637722mvm5CqKBGtGcZQ0z+tmFMhdEl1rf4DeTf5EpSKtvc7nKeFkw4/29dl1sMIYOfU1/LHxBew+usKrbH8lG7uW3ZB+PNBmSoY7B0fHrYl+liFrFJCfrSWwrDj6Nlr3OAP+uuuuMzcj4MQ/upkwWBPzeE13AgYKZH6PrKW2rJn0XO6Nr8S5YgDdT6zRXDPTP3+4ZikD/Z+5+QDL5ZsHKy1HKukLbgVrTWTeo3DmpCNriSwrjfOZ66NaqxRwMuA111xj+tHSd5eBvtjWjH/nvP5+5i5gDHxQID8eFPR8TU/x6Gr02czg9MdXPsxOdhT4bCdHkukzzEDfeU4KVRCBWCQg4RqLva42RyUB/vhzm1mOhllL4kRlQ71sVJViibi/2QzcUOsd5MsV+Gzq3UdX4rMlXfHFslvAiVbeBLoZcPJYViHOj/8Tc0IcxStdBOg64hy4/qglDjlZkJOyKPq4SgRn5VNIcr1PBopArnVKlwMGy4eUQpGrBFiBApZpHV/VWytmcJKftboF4z///HNT3HHFiqeeSp/s9v7771umzBUznnzySfMzBTN9VDnJzNpAwN3EIrrCJCUlmXWl+OYoJOvNUWe+vudoNIOV35pUaN78549VZyuNY5zzNddLtYQ9RSRX0KBAJj9O2LLa5mjLsu9oy1c+zMt2caMItpMj2fzMEWUydRxht8p2Ltfxs+O1Y710LQKRRiDOcIw/76nSnGzC/0kwZJHUTKM/IiACIhBuBI6f3mcKyLnb/hVOgdaxQ8IQtE/wvErAV8tuxZJd6buuBVJer0a/oXqJTheY4GgfX6978mGlSwDT0V/ZEnU0RAHLGeqWWOU9rkbB0XqO8DrfZ3qOulpLQjG9q0Dhydn/zn7Y3AGO9p0fqihaOQrM+1nZdi6PO4zxVTr9wrNbmFHQc5IcRz8d3TGc6+j82Rs+9EumqOeDCUewrVFyMs3udjrXX59FwF8CXHXEeusSiJ704znf3yornwiIgAiEhkD+XMXxn1rv4b6m01C56OW2VGLKhsF4bVZtYxes71zaW/X3T7aIVhrnZK19JzZeUA6FpyfRygyMtyb2ORqg2HMUp4yjbyxXtHB1nzP6vRGWHAmki4DjiCBt842As2jlfZZVq1Ytr2wzvWPgJElOEAymmONoJ7dEtsQjy+coqOUC4Q0Txzr7yoejqeyrUIhzx3rrWgTChYCEa7j0hOohAiIQdAIUrRSvFLEUs4GGv4/9hc+X3mgcNxkbC6zOZG7SBs+jsZkSZ/HhaNpujFv+785NWSRXtE0EODrKUU9OdORoJ7ekpZ8vxSddLxisyWc2FSkzIiACWRCQcM0CkKJFQASij0CL8vebqw+0Kv+ALY1bvvtbY/S1FqZsSDLt/bnpRew6sswW25aRzQdn4puVd1kfdc4GAhSo3FqVGz4wcPkx+vky0M+Vo67WFrfmTf0RAREIOgH5uAYdsQoQAREIZwIUhJxAtXF/si3VLJa/Cg4c34zzOGeLPWcjV1UbhnaV0yc1Ocfpc/AIcDky7kZ26tQpc6a/ry4CvtSMvrv0KeYEPOdd8Xyxo7QiEE4E7PJxTZ9+GU4tU11EQAREIBsJcMksLp01b/sIU8AeS9sTUOn7j1/oixqQQafMv617ytxZq27pG51i9DGYBDixjb672RHou+tpk5PsqIPKEIFwJSBXgXDtGdVLBEQgWwlw04LHWq8DNzEI98DJWjuPLA33aqp+IiACImA7AQlX25HKoAiIQKQS4Hax3DaW28dWNbaRDddw+uxxc6WBNOOsIAIiIAKxREDCNZZ6W20VARHwikDFIq1wT5Pf8X+1R6Jg7tJe5cnuRBxxHWeMvCqIgAiIQCwRkHCNpd5WW0VABHwi0OySe/BYm3VoXfEhn/JlV2KuZkCfVwUREAERiBUCEq6x0tNqpwiIgF8E8uSIR5cab+KB5nNQrXh7v2wEMxOX3lq445NgFiHbIiACIhA2BCRcw6YrVBEREIFwJlChcAvc3XgKbrx0FArlKRNWVeX6rpsPzAyrOqkyIiACIhAMAhKuwaAqmyIgAlFLoGm5XnjUWH2gTcUBYdVGrjTAHbYUREAERCCaCUi4RnPvqm0iIAJBIZA7RwF0rvE6WlXoGxT7/hjdf2Ijvta2sP6gUx4REIEIIiDhGkGdpaqKgAiED4GDJ7di/vYPw6dCRk3W7puEn1aH/zq0YQVNlREBEYgoAhKuEdVdqqwIiEC4EJi8/lmcOXcqXKqTUY/ZW9/BrK1vZXzWhQiIgAhEEwEJ12jqTbVFBEQgWwis2TsRKaljsqUsfwr5efVDYB0VREAERCDaCEi4RluPqj0iIAJBJ8DR1nAPnKy17/j6cK+m6icCIiACPhGQcPUJlxKLgAjEOoHpm1/F9sMLwx7DsbQ95rawYV9RVVAEREAEfCAg4eoDLCUVARGIbQKHTm5HJIy2Wr205eBsY1vYntZHnUVABEQg4gnkjPgWqAEiIAIikE0EJq8fhNPnTthSWr5zQJk070ztzA2c8DDMUOWkezsHNo7BolM50ahsD6BgRSC+kvvE+5YAaYfcx1sxdtnJXRgo3sCyeuH5yGbg6JYL7zvfyS47aQeBfUudS3f9uUxb1/d5NxR2itcHchdxX6ed09zHWTFZcbbS6SwCQSQg4RpEuDItAiIQPQS41NTC1NG2NKjoGeCJ7d6bGnExsDGv+/S9d7mPM2N2fQSkGEfjwcaR5D7x7P6ANwLGLjsUd12S3ddn7Wij3kPcx1sx2WVn7xJgQjurVM/ne8+7jw+Fnc5/AmUT3ddpvIc4x1zV7wASRzve0bUIZCsBD8/w2VoPFSYCIiACYU3AThcBjrZ6EqLBAnHkVFYKN1gly25UEOBIu6dR26hopBoR7gQ04hruPaT6iYAIhJzAjC2vY9uh+bbVI9V49c9RVE+v+B0Lo6uAp0Bb3oScacnocfYocuco6Dp5qzeBUwddxzne9eRuwHTe2slTxNHqhdfVexr+FIkX3ne+k112SjQAOHIZaAiFHZbpKXjTLvZ7Vn3vqQzFiYANBCRcbYAoEyIgAtFL4PCp1KBNyLJr1NVrO2lrzJUGetT/znWHefI3dZ3D9V277NgllOyyw9FGT6/bXdO48G642WEN7WjXhS3VHRGwnYBcBWxHKoMiIALRRIATstLOHouaJq3Y/T0mrnsiatqjhoiACMQWAQnX2OpvtVYERMAHAuv2TcGCHR/7kCMykiZveslolzFZS0EEREAEIoyAhGuEdZiqKwIikH0E7JyQlX219q6kb1fejU0HpnuXWKlEQAREIEwISLiGSUeoGiIgAuFFYOaW4dh6aG54Vcrm2nBb2COndtpsVeZEQAREIHgEJFyDx1aWRUAEIpQAl42avGFQhNbe+2ofOLFZ28J6j0spRUAEwoCAVhUIg05QFURABMKLQHyei/HcFYex88gypB5ZbBxL0o/DS3DyzMGAK8sNCBofTTeTUhA4EML/E9OP98e/HsANtd4NuF0yEMUE1o4Gjhi7mMUba7lymTIFEQgRgRD+7zJELVaxIiACIuAlgTLx9cCjMe7IyLH3+LoMEWuJWl9ft1O4dvhH/3Ipq1AKVzZszrb3UCJ/NbSu2D+jnd5cTNmQhKkbst7ZqkrRtujdNNmtSW/t0MBLHc+7tbNxfzJGLPRuZyu77PRu8ieqFEt0W6fHJ8e5jXOMsMtO+4TB6JCQ5Gjanus1o9N3VeMuZRKu9jCVFb8ISLj6hU2ZREAEYpUABR6PeqVvykBw6NQOpBqjsTsdRmf3Hd+QER8JF+PXDDDbVbPktZFQXdVRBEQgRglIuMZox6vZIiAC9hEonKccCpcsh1oOou/E6f2ZXAzobrDr6HL7Cg2CJU7WeqD5bEPAVvfKekLRRCAh66RF81XymMhbOx6NGJEshyOOgQZf7GTVNm/rY5cdk2WgAJRfBMKYQNx5I3iq35AhQ5CUlGQmySKpJzOKEwEREIGYJ3Dm3ElTzB7dPA61571h8hhVJifW5TF8B8IkVCjcAn0M8Rpn/KcQHQQsd4WsXDY8tnZ84r+uAl2SPSZVpAi4ItCuXTskJyebUYHoSY24uqKreyIgAiIQBAI5L8oLCkOUPGlYTxeudzeegp3xRf8dnTVGZncabgcnbJgE5k8TuATYOGPk9ZY6n/qTXXlEQAREIKgEJFyDilfGRUAERCBrAmXi6xuTwOq7nQS2859VDQ6fSs3amA0pFqV+Zvq7XlnF/ZJgC1NH4+CJLSiSryKalO1pQ6kyIQIiIAJZE5BwzZqRUoiACIhAthNwPwlssTEJ7J/luYxzsCaBcdcw1qH+xd1ctj1lx2hsPDANfP0s4eoSkW6KgAgEgYCEaxCgyqQIiIAIBIPAv5PAOmeYzzQJjILWcDOwaxIYJ2tRvJYr1DijPF2IgAiIQCgJSLiGkr7KFgEREIEACeTLVQwJxa4wD8uUNQmMItbaPIGjtGfOnbKSeHU+ey7N3FmrT7PZyJuzkFd5lEgEREAEgklAwjWYdGVbBERABFwRyFME4ELuDLy2OViTwMyJYA6203cCSxezlrvBidMHHFJceLn76EpzstbtDX64MFJ3REAERCCbCUi4ZjNwFScCIiACKN4ACMGSQv/uBHZ7RidwJ7B0EbvUcDNI397WeRLYyr9/xK9rH8M11V/OyKeLGCMQgu9rjBFWc70kIOHqJSglEwEREIFoJGBNAqvrsBPYYe4E9o+/rOVqMG3zK6a/a7NL7olGDGqTCIhAhBCQcI2QjlI1RUAERCC7CBQydgLjUbPEv9u/0qWAInbjgWRjJYHE7KqKyhEBERCBTAQkXDPh0AcREAEREAFXBPLlKmpMAGvnKsrWe0eOHEFKSgoaNWqEQoU0IcwOuL2b/GmayZvLfn9qO+onGyLgCwEJV19oKa0IiIAIiICtBJYtW4bp06dj2pzfsChlMTau+XeThSrVy6Bx00Zo0egK1K1bFx06dLC17FgxVqVYYqw0Ve2MAQISrjHQyWqiCIiACIQjgTGfjUa/hx7A+YtOoVS1syjTDGh2N1ChHrBzDY+dWLXmFyQPn4Q9W8/g7nt64sORn4RjU1QnERCBbCIg4ZpNoFWMCIiACEQTgd5NkwNqzv/d1gHffzkVV/QGmnS90FSZGsaKYcaRHs5g6UTgy0/GYMKv4/Hjd7+gefPmVqTOIiACMUTgohhqq5oqAiIgAuFB4MhmIGVI+sHrGArz5s1DXFwcFqyYivs/dy1aXeGofzXQd9x55C+3Dy1atMCQIQY/hewjsG+JMQQ+DeBZQQRCSEDCNYTwVbQIiECMEjCFa5IhXI0jxoRrmzatUcdwVe3+ChBf0vf+v/F54MahQFJSEjZs2OC7AeXwj8Ds/sD4RIBnBREIIQEJ1xDCV9EiIAIiEEsEOl3bFjnzncE1jwTW6ipNgQbGSl2db2gfmCHlFgERiDgCEq4R12WqsAiIgAhEHoERI0Zg8q/T0e5ee+rezvCN3bFzG5KGJNljUFZEQAQigoCEa0R0kyopAiIgApFL4OjRo3hu6CA07ALTTcCOluTKAyTeexZDkoZgypQpdpiMWhuPT44DjxELEqO2jWpY7BCQcI2dvlZLRUAERCAkBLhWa+r2PaZwtbMCtRKBoqVzY80aY+0sBREQgZggkDMmWqlGioAIiIAI2EqA27+ePHMIeXMWRtn4Bh5tU7gWK5cDJSqe9ZjOn8jC5dKwbNVCf7IqjwiIQAQSkHCNwE5TlUVABEQg1ATGr+6PjQemoUrRtshqTdfZ85JRpqb9opUMil0CrFy1NNQ4VL4IiEA2EZCrQDaBVjEiIAIiEKsE5s6dj5KVg9P6UgnAhnVbgmNcVkVABMKOgIRr2HWJKiQCIiAC0UNg+vTp2LxhmzkyGoxWFSgKHDpwLBimZVMERCAMCUi4hmGnqEoiIAIiEMkEjh8/jvfeew/16tVD27Zt0ahpPaT+FZwW/W3sQVApoWxwjMuqCIhA2BGQj2vYdYkqJAIiEPUEyiYC956PumYuWLAAo0ePxqhRo3Du3DnkzJkTDz/8MPIVyI0xPy4KSnv3Gl4C1WtUD4ptGXUg0CXZ4YMuRSB0BDTiGjr2KlkEREAEooLAJ598ghYtWqBZs2b46KOPkJaWhjNnzpji9fHHH0ejBk2xc3UcTh23v7kHtl+ERnVb2W9YFkVABMKSgEZcw7JbVCkREAERCG8CqRuO4c9vgbcmzsSpE9Nx/nz6CPKpU6fMiufOnRtPPfUUSpYsaboMnEk7j62LgWqX2deuw3uA/dvjUKNGDfuMypIIiEBYE5BwDevuUeVEQAREIPwIPProo3j11YXImRs4k+Z6mav8+fODo60MCQkJuLn7fzDlox9sFa7zxxk7cdWric6dO4cfpDCqUfuEwWZtiuarFEa1UlVEwD8CEq7+cVMuERABEYhZAq+88grmrP0cc37Z5ZJBjhw58MwzzyBv3rwZ8e+/M8qYRPUbfh52Atc9lXHb74vNhsvsop+BWbNGomDBgn7biYWMHRKSYqGZamOMEJCPa4x0tJopAiIgAnYS6PFMDVRpCuTIFXeB2dKlS5uTshwjihUrhvff+RCrpwFblznG+Hc97kng1TeHoVUr+bf6R1C5RCAyCUi4Rma/qdYiIAIiEFIC3C3rqXs/Ro64XMiTJ0+mugwenP5qOtNN48Ntt91muAx0xdhHgYM7nWO9+3z2DPBZ3xzmigU7tuzB1KlTvcuoVCIgAlFBQMI1KrpRjRABEYgoAmkHgZ3G0CMPXkdgeOONN3DXXXehX79+qFChAjgZi6F69eq499573bbo6y+/w/PDBmFkT2DJL26TuYxY8B3w2rVAoVyV8Oyzz2LOnDno0KEDihcvjh49euCrr77CoUOHXObVzQAJ7FuS/n3lWUEEQkhAwjWE8FW0CIhAjBLYa/z4j09MP3gdYWHQoEEYOHAgKF7p7zp+/HgULlzYbIW70VbHJj7z5HNYunQpUmdVxM9Dc2DHSuDUUccU/16fPQ1wk4FJw4EZn+TAs4MHYfXK9WAdKFy3bt2KYcOGmYKV4rVIkSLo1KkThg8fjnXr1v1rSFeBEZjdP/37yrOCCISQgCZnhRC+ihYBERCBSCPQt29fvPvuuxgzZgxuv/12s/pcjuqnn37CI488gltvvdWrJnFXreULN2PAw/0w9vUvsGv7fhQvfxGKlT+HUgnAvq3GUlfb4swjvnB+Y53Ylhgx43k0b948k/3y5cujd+/e5nHy5En8+uuv5vHSSy+hf//+aNiwIa655hrzkD9sJnT6IAIRSUDCNSK7TZUWAREQgewnQFH6/fffmyOszktQtWzZEk8//bTPlXrjtbfAY+3atUhJScHc+TOxaNEidGraGK36XI46deqgdu3aXtnlKgZdu3Y1D2aYOXMmfvnlF7O+L7zwAsqVK4drr702Q8jmypXLK7tKJAIiED4EJFzDpy9UExEQAREISwJHjhzBTTfdhGXLluGPP/5wO5OfI5v+BvrG8ujevbu/Ji7I17p1a/B48cUXsWbNmozR2BtuuMH0ybVGYnmmqI3WMGJhO7NpZQvWR5eab0ZrM9WuGCEgH9cY6Wg1UwREQAT8IbBlyxZceeWV2L59O37//Xe3otUf29mZh+4MAwYMwJQpU7B//358/PHHyJcvH7iZwiWXXGIKXPrKLlkSeT7HWXHcuD8ZPFKPRF/bsmq74qOPgIRr9PWpWiQCIiACthCgiGvfvr25kQBFa61atTLsUgRtPDAtIsVQ0aJFzaW5vvzySxw8eBCTJk1CkyZN8Mknn5g+sRz5pX8sRa4/YfPmzf5kUx4REAEvCEi4egFJSURABEQg1gjQJYAjrfQvpWjlpgKOYfzq/hixIBE8R3ro2LEj3nzzTXMVgsWLF+OOO+7AggULwPuWyP3iiy9w4MABr5pKv9rnnnvOq7RKJAIi4BsBCVffeCm1CIiACEQ9gW+//dYUrdddd525WkAsTWJq0KCBOcls1qxZpnvEyy+/jOPHj+POO+8Ed//iurEUuZxM5i78/PPP4LJgnMC2a5frbXHd5dV9ERABzwQkXD3zUawIiIAIxBSBDz/80JyIxXVa+eo8lgMnbN1zzz344YcfcOzYMfNcuXJlvPrqq6DPLEXuU089Za5eYHFiusmTJ5sfOVLNZb+4RJeCCIiAPQQkXO3hKCsiIAIiEPEEuPYpd70aOnQoXnvttYhvj50N4KgzVyMYOXKkORLLEVmOqE6cOBFt2rRB2bJlTZGblJSUUSzXld2zZ4+5BNfzzz+fcd+XCy4RtnOnn/vj+lKQ0opAhBDQclgR0lGqpgiIQBQRKJsI3Hs+rBr0+OOPg6/FublAnz59wqpu4VgZbmbAgyKfO3RZGx+MGjUKFLmnT5/OVG1uUTt//nx89NFHKFWqVKY468PRo0dBNw2K1dnzk7F8yWqcTjtjRl9crjgaN22I5o0uR926dU0RbeXLlnMrYxmtUweBPEWypTgVIgLuCEi4uiOj+yIgAiIQIwT4OpyC66uvvkK3bt1ipNX2NbNatWp46KGHzINbzh46dMil8alTp5qic/To0bj66qszpZk9ezb6DeiN5ctWo9gl51DUOFoYS9pyF7GTR4ydxLbtw9qtUzF/VDL2bD2DDp3a4b13RqJq1aqZ7ATtQ/EGQTMtwyLgCwEJV19oKa0IiIAIRBGB8+fPm/6s9MnkklCcRa/gP4Fp06a5Fa20StcBHtzwgK4DzzzzjFnY8LdeR/+HHkY9Q8veNRIolHkBB6cKncGmhcDvHySDgvmtt97Cgw8+6JRGH0UgeglIuEZv36plIiACIuCWwL59+0zRunHjRnO5q6ZNm7pNqwjvCNBdgNvO0k0gR44cuOii9Gkk586dw9mzZ83DsjRo0CDTJWDe/DnYmbob1xm75da83Ir1fK7cBLh71Hl8nwT069fP9LP1NAGsfcJg02DRfJU8G1asCEQAAQnXCOgkVVEEREAE7CRAn0xu4UphxZnvCQnG+2iFgAmsWLEC119/PQoVKpTpiI+Pz/jseH39fzqborXfd0Degr4X3zUJWD8X+H7wRIwdO9atm0eHBCOhgghECQEJ1yjpSDVDBEQgNggsW7YMixYtwtpVi7FuZQrWbdiMtVv+Ruli8ahdoxJq122MhFqNTN9H7nrlHObNm2eKVr5m5kQgLrCvYA8BbjzgbRgyZAiWLFqOGwb5J1qtcqq2ABLvBrp37w6OmushxCKjc7QSkHCN1p5Vu0RABKKOwGeffYaBDz2A82dOolrJ06hmTE7vVhuo2wHYc3Q/VqXux8o/F+Hbsbmx+e809O7dGx988EEGh99++80UrVdddRW++eabjPv+XHSp+SZOnj6IvLk0y9xXfnx4eOGF51HH6LfqrX3NfWH6ZjcB21YAfQfcjYk//3lhAt0RgSgiIOEaRZ2ppoiACEQIgTRjWaF9S9MrW7w+kDtr8dfrv13x8Rc/4PnrgWeuyaqdaXjH0C/9PxyBieO/x7jvx2PDhg247bbbzHVaR4wYkZWBLOPLxmuWeZaQ3CQYNuwFlK4ah2secZPAj9uX9wS+fnQWOJLLXbsURCBaCUi4RmvPql0iIALhS2DvEmBCu/T6dTYUJtd1dRM4OteiRQvkzw3MfQJoXtlNQqfbfQ3zHYzR2P/7YI+Zn9FPPPEEXnzxRaeU+pjdBGbMSkaT7unrs9pVdknje1Gx8WmsWGV8t4IRxicCO6cBZdoCXZKDUYJsioBXBLRzlleYlEgEREAEQkPg8jat0agCcOxt70WrVdMaxrJKK4zBt6Qu6XfuvttwhlQIKQH6KB/YdwQlKtlfjWLlgeWG77OCCEQzAQnXaO5dtU0ERCCiCdzQ5SqknT6DOcZIayBhcGfgiprATdcbTpUKISWwdGm6i0jJSvZXo0RFYPP6HfYblkURCCMCEq5h1BmqigiIgAhYBOiH+tOESXjtRsMFNod11//zF72M0dfVmzEkKcl/I8oZMIG5c+eisDESnjc+YFMXGOAuW6dOnjH9mR0jRyxsBx7jV/d3vK1rEYhIAhKuEdltqrQIiEA0Ezhx4gSGDR2Ce9oAA20aJL24EPDR7eeRZEze4U5ZCqEhsG3bNuQvEheUwgsWSze7d+/eTPY37k8Gj9QjQfJ/zVSaPohAcAlIuAaXr6yLgAiIgM8EuJvV1u070ceYB2Nn6NECuKR4LixZIgFjJ1dfbDVr1gz7tp33JYvXafdsSk/KNXoVRCBaCUi4RmvPql0iIAIRS2DTpk3IkzMOtcrY34RqJU5j8fzpARsesSARj0+OA88K3hNo2LAh0o4DB3d6n8fblHs2A8VKFkSxYv8MvXqbUelEIIIISLhGUGepqiIgArFBYMPaVah58XlDvNrf3uqGf+XixZp5bj9Z7yzWrVvXTLhvq3fpfUm1fxtQpWolX7IorQhEHAEJ14jrMlVYBEQg2gmsXrHIEK7BaeWlZYE1G1Nx6NCh4BQgqx4JVKhQARUql0EwhOuh1JyoX7uZx/IVKQKRTkDCNdJ7UPUXARGIOgKr164PipsAQdUtl45Lo66h+9pc3/lGLPgmB/hq366wYR6wds4ZtG2baJdJ2RGBsCQg4RqW3aJKiYAIRDWBsonAvcYEHR68dgq1a9XEylSnmzZ9XGK8TmZo3Lhx+oX+ZjuBt956C8WLlcTkt+wp+uxpYOJrceg34H706NHDHqPOVlq9CXCXN54VRCCEBCRcQwhfRYuACIiAKwK1G7TAqiBM3mFZy4316evWqID4+CAsJOqqMbrnksAH736MHSuB6Z+4jPbp5nhjF9/cOQpg+Ovv+ZTPp8TFG6Q/ZPGsIAIhJCDhGkL4KloEREAEXBGoWrWaOeJ66ISr2MDurdlljLY2kR9kYBQDz3311VfjwQcfxNyxwOpp/tv77Q3DRWAWMO1P44+CCMQAgSDMWY0BamqiCIiACASRQEKCsQWSEegu0Cr90rbS1vydE91atLXNngz5T4AuA+fPn8c7w97B9hVA215Arrze2duyBKBoPWQ8iNBOvXr13GasUjS9v8vGa7TULSRFRAwBCdeI6SpVVAREIFYIVK1aFW1aNcczP83DHwPta/XQXw1bOfPi8ssvt8+oLAVE4O2330b37t3R9aYu+GrlYbTscQYXVwPiS7g2u387sH4OkDwKqF03AQtn/AZ+XzyF3k2TPUUrTgQiioCEa0R1lyorAiIQKwSefGYwrrnmGvT5Enjv1sBbPW8TMOgn4NNP3/M4Ohd4SbLgK4FWrVph/ZotePTRR/FB0gdm9kKl4lDskvMoWQU4eRjGblvA/m1xOHn0vBn/5JNPYtiwYb4WpfQiEPEEJFwjvgvVABEQgWgkYPlAckTujpZA88qBtfKK14G7bu1i26zzLjXfxMnTB5E3V5HAKqbcJoGCBQvi/fffx4ABA5CSkmIeE3+bgMU/bUClypegTdOGaNm7LerUqQNuYlCqVCmRE4GYJCDhGpPdrkaLgAiElEDaQWMIbWl6FYrXN6aEuxZ/9F2c+Ot4tPjfZmwYauyKVNL3WqedBeo/BxxPAz764mffDbjJIX9JN2ACvJ2amopZs2bhyy+/xIEDB7B8+XJTrAZoVtlFIGoISLhGTVeqISIgAhFDYO8SYEK79OpybUwXa7labVm3fhNat2yChGdS8MFtQG8f3FNfnwI8/K2hi3PlRN++95m+kHQ/uPLKK9G+fXsUKFDAKsaW84ETm5GSOsYrW43L3oGi+Sq5TTt1wxC3cY4R4WaHE6GqFEt0rGKm64Wpo3HwxJZM99YuT8X0iavw+0/LsHf3YeTKzZ/mODz11FNuRasrO5mM/vOhSL6KaFK2p6so3+6NTwR2TgPKGBO9uiT7llepRcBGAhKuNsKUKREQAREIBoGZcxbis88+w+23344pfwED2sPcEra4C9155lz6agTvJgMfzgCuv7Y9fpxgKFgjdOzYESNHjgTdDxhat25t+tFSyDZrFvgSWRSuUzYkmbaz+kOB50m4Rqqd9gmDPQrXlB2jsfHANHPL1zVG/6z6w/Bd3W7MmcsNnDFGxRlOp51B8VLxeO45Y6jcTbDsuInOuE3OtgjXDIu6EIHQEpBwDS1/lS4CIiACXhHgjkj169fHowMeQOuXZ5p56DpQ3XB1rFMO2HvEEKzGpgWrdsbh2KnzqHBJaXzwwRD07t07w/67776baQRv5syZWLhwoTmyV7Ro0QwRy9HY8uXLZ+TThT0EDh48iKlfbseMX4Dd6wyxmssQq6fTbVui1Srp7sfbI0eOHNZHnUVABP4hEGesIZc+RdENkiFDhiApKcmMzSKpGwu6LQIiIAIikIlAarLXrgKZ8v3zYfHixViyZAkWL5yLJYsWYPHy1ah4SRm0aNkSTZq3NlcN4LqenPDjHDji6ihmHeMplOLi4nDmzBlwpjt9LRXsJcAHhZZGP5Gxq5ArVy60a9cOkyZNchUduntyFQgd+ygpmd/r5ORkszWB6EmNuEbJF0LNEAERiB0CDRs2BI8777zT50bfe++9+OOPP/DTTz/h5MmTmfKfPWvM5PonvPPOO9alzjYSaNKkien2wbVbXYXTp0/jxReNPVwVREAEXBLQlq8useimCIiACEQvAa5WkC9fPrcNZDyFsUJwCHTr1g333XffBcbz5MljruXaqFGjC+J0QwREIJ2AhKu+CSIgAiIQYwS4BijXDHUOFE5VqlRBnz59nKP02UYCX331FUaNGoWKFSsid25jVtY/oVChQh4nZFnpdBaBWCYg4RrLva+2i4AIxCyBW265xfR1pVi1AkUU3QcaNGiQ4YtmxelsD4Hnn38et956KwYOHIjNmzeb/shWH7z00kvImzevPQXJighEKQEJ1yjtWDVLBERABLIiMHz4cJQuXToj2ejRo80F77kzEydSDB06NCNOF4ET4HJmzz77rLkkGUUqw9ixY5E/f34kJib65bMceK1kQQQii4AmZ0VWf6m2IiACImAbAY70jRgxAtxelluNdu3a1bTNXZu4xmvfvn3BJbM4Uatq1aq2lRtrhtavX2+uwbt9+3ZzYhwfCqyQkJAAug4ULlzYuqWzCIiABwIacfUAR1EiIAIiEBQCJRoA3DGLB69DGK666io89thjeP311zPVgn6uS5cuxYkTJ8z1Yz/55JNM8frgHYGJEyeaS4vRBYDLizmKVstCp06d0KJFC+tjeJ5bvZn+feVZQQRCSEDCNYTwVbQIiECMEshdJH2bV271yusQB+u1tXM16DIwbdo09O/fH3fddRd69eplClnndPrsmgB3KOMWuzfccIM50hrRmzoUb5D+neVZQQRCSEDCNYTwVbQIiIAIRAKBF154ARw5pIjlxK3JkydHQrVDWseHHnoI/fr1Ax8KuOmDggiIgD0EJFzt4SgrIiACIhDVBOhSwN26uOsTX21zkpHChQT27t2La6+9Fh9//DG+++470w3jwlS6IwIi4C8BCVd/ySmfCIiACMQYAW4hy5UHPvzwQ7zyyiu44oorsGrVqhij4L65c+fOxWWXXYbU1FTMnj07Y7Kb+xyKEQER8JWAhKuvxJReBERABGKcwN13322OvubIkcOcuMWVCWI9fP7556ZorV+/vila6R+sIAIiYD8BCVf7mcqiCIiACEQ9gRo1amDKlCl45plnzO1Le/TogUOHDkV9u101MCkpCWz/448/jnHjxnncTtdVft0TARHwnoCEq/eslFIEREAERMCJwODBg/H7778jJSXFnLg1YcIEpxTR+/Hs2bP473//iyFDhuCjjz7CsGHDorexapkIhAkBCdcw6QhVQwREIIYIpCYDI+PSD15HeKCvKydudejQAV26dMETTzwR4S3Kuvpr1qwxXQO4NitXW+ByYVEdZvcHJhgbJ/CsIAIhJCDhGkL4KloEREAEooVA7ty5zWWfPv30U7z//vvmzlsUs9EYfvnlF3NTgfj4eHNTgcsvvzwam5m5TfuMvkxNBnhWEIEQEpBwDSF8FS0CIiAC0UaAvp4UrNzCtGHDhuAi/NEUhg8fjs6dO+Pmm282fXzLli0bTc1TW0Qg7AlIuIZ9F6mCIiACIhBZBCpXrgyOSr744ovmIvy33HIL9uzZE1mNcFHbvn37mruIvfrqq+aososkuiUCIhBkAhKuQQYs8yIgAiIQqwTo6zpjxgzQH5Q7bv3www8RiWL37t3gBgxc8urHH3/Eww8/HJHtUKVFIBoISLhGQy+qDSIgAiIQpgRat25tug785z//MRfkHzhwYJjW1HW1OPmKmwpwRyxeX3/99a4T6q4IiEC2EJBwzRbMKkQEREAEYpvAO++8g7Fjx+Kzzz5Ds2bNMH/+/LAHMmbMGHOSWZMmTcxNBS699NKwr7MqKALRTkDCNdp7WO0TAREQgTAhQF9XTtwqV64cmjdvjtdeey1ManZhNQYNGoSePXvi6aefNgU3V01QEAERCD0BCdfQ94FqIAIiIAIxQ4Cilb6uFK2PPPII6EKwY8eOsGl/WloaunfvjqFDh2L06NHmOWwqp4qIgAhAwlVfAhEQAREQgWwnQF/XefPmmaKVE7e+/vrrbK+Dc4GrVq0y/VnpxjBz5kzccccdzkn0WQREIMQEJFxD3AEqXgREIAYJlGgAdP4z/eB1jAbL1/X2229Ht27dwOWmQhV+/vlnU7QWL17c9GflhCwFBwI1egKNBwM8K4hACAlIuIYQvooWARGIUQK5iwBlE9MPXsd4oNvA999/b7oQcPSVo53ZGV5//XVztYDbbrsNv/32G0qXLp2dxUdGWdV7GsI1CeBZQQRCSEDCNYTwVbQIiIAIiEA6Afq6cuJWjRo10KZNG3PzAl/YHDx40FypYNOmTb5kQ58+fcx1Wd944w1w5QMFERCB8CaQM7yrp9qJgAiIgAjECoGSJUuavq4Urg8++KA58koxyZ24nMPs2bPx1VdfYc3S2Vi9bgO27TqUkaRA3lyoXa08alzaEFVr1sXgwcYrbqewc+dO04c1JSUF48ePN7dxdUqijyIgAmFIQCOuYdgpqpIIiIAIxDIB+rouXrwYhw4dMnfc4tqvjuGD9981/VHXJI9EjZyL8FjiIcx4FDjyFrDxBWBsr9PoWnUjzm/6Dq++NBQtGl9qTgSzbEyfPh2tWrUy7XNTgc6dO1tROouACIQ5AY24hnkHqXoiIAIiEIsELF/XJ598Epy8xa1j//e//6HbDe0xZcZifHQ7cNdlaRegKZgHqFzCmPdWLz1q7sYzePz7VWjRogWSkpJQvnx59OrVC7feeiu4wUDOnPoZvACibohAGBPQiGsYd46qJgIiIAKxTuDFF1/EhAkTMHHiRHDGP/Yuxh5j34K7vJz036IKMO0RIKmLcRjC9d5778Wzzz6LL774QqI11r9can9EEpBwjchuU6VFQAREIHYIXHvttTh98gjubg1M7g+UKOh72wcb3gDnRwBnz5413Qx8t6AcIiAC4UBAwjUcekF1EAERiC0CqcnAyLj0g9cKHgnc1eMm5MdhfNjDYzKvIil8O3XqhN27d3uVXon+ITDbADehHcCzggiEkICEawjhq2gREAEREAHPBL755ht88vm3hmg97zmhl7EdagGPdjSOB+/0MoeSmQT2LQFSkwGeFUQghAQkXEMIX0WLgAiIgAh4JvDmq8PwUlfgypqe0/kS+8y1wPLFc81ND3zJp7QiIAKhJyDhGvo+UA1EQAREQARcEDh58iSWLFuJGxq4iAzgVqG8QINSB7A4ZV4AVpRVBEQgFAQkXENBXWWKgAiIgAhkSWD58uXIk/M8qgdhB9bqpYAl8/7Msg5KIAIiEF4EJFzDqz9UGxEQAREQgX8IrFixAjVLnwsKj9plgcXL1wTFtoyKgAgEj4CEa/DYyrIIiIAIiEAABObPmRE04Vq3HLDj78PYsWNHADVUVhEQgewmIOGa3cRVngiIgAiIgFcEUlNT/Vqz1RvjpeLTU+3atcub5EojAiIQJgQkXMOkI1QNERABERCBzARaXnY5Vu7MfM+uT0u2Ablz5UDjxo3tMik7IiAC2UBAwjUbIKsIERABERAB3wk0atwEq3bl8j2jFzkoXBtfWtGLlEoiAiIQTgRyhlNlVBcREAERiAkC8ZUM1TQ4vam8VnBJoH79+ti85zRSDwFlC7tM4vfNNcbGWY2bNvc7f8xlrNHT6IREQN/XmOv6cGuwhGu49YjqIwIiEP0ETOGaFP3tDLCFpUuXRr1alTF34yZ0bRigMafsa/bH49ZWxhZaCt4RqN7Tu3RKJQJBJiBXgSADlnkREAEREAH/Cdze6wH0/zY3Dhz334ZzzneTgWMXlcKNN97oHKXPIiACYU5AwjXMO0jVEwEREIFYJvDwww+jevWauGWkPRTWGi4Cfb8CXnnnUxQsWNAeo7IiAiKQbQQkXLMNtQoSAREQARHwh8C48X9iyl/A+9P8yZ05T7vXgLdfegqtWrXKHKFPIiACEUFAwjUiukmVFAEREIHYJVCsWDF8/vnn6PMl8MZU/zhwFYHSjwA5CpRE38de8M+IcomACIScgIRryLtAFRABERABEciKwG233YbJkydj4DcAR00XG0LU29BvLNBwKFC5Wm1s3fG3t9mUTgREIAwJSLiGYaeoSiIgAiIgAhcS6NChA44cOYKaza5CI0OIDpkATFgGHHQxcYsbF3w0C2j9CjBiZg4kDR6EuSkrLzSqOyIgAhFFQMthRVR3qbIiIAJRQWDfEmDOgPSmtHwDKN4gKpqVHY3ghKr3P5uI+pd9gPeGv4Sk8ZvNYhsbewm0qQrsOGig3ZQT2/efQcliBdHqsjaY/vlgNG+uNVsD6p/Z/YH9S4Fi9YFWbwZkSplFIBACEq6B0FNeERABEfCHwClDXaUmp+fktYLPBO677z7wSE1NxaJFi5CSkoKUhQtQvkZFDHuoBbh5Qb169Xy2qwxuCPBha6cxO+78eTcJdFsEsoeAhGv2cFYpIiACIiACQSBQtmxZ8OjcuXMQrMukCIhAuBGQj2u49YjqIwIiIAIiIAIiIAIi4JKAhKtLLLopAiIgAiIgAiIgAiIQbgQkXMOtR1QfERABERABERABERABlwQkXF1i0U0REAEREAEREAEREIFwIyDhGm49ovqIgAiIgAiIgAiIgAi4JKBVBVxi0U0REAERCAMCqcnpSxB5U5XGg92nOrIZWDvGfbxjjF12qt8BxFdytJz5OmVI5s/uPoWbnTJtgbKJ7mprcB4NHNniPt6KsctOfEWgek/L6oXn1GTvvkNZ2bnQsu6IQEgISLiGBLsKFQEREAGDQO7CQJ4i7lHsTAa8FXhZCc6UJPflOMbYZYfCzKNw9bI+4WaHfDwJ1zWjvROKdtkhH0/C1dvvUFZ2PH1PHb8/uhaBIBOQq0CQAcu8CIiACFxAoEQDoKAxUsYds7Rr1gV4dCMMCVjf00o3hGHlVKVYIhB33gieGjxkyBAkJSWZSbJI6smM4kRABERABERABERABGKUQLt27ZCcnGy2PhA9qRHXGP0CqdkiIAIiIAIiIAIiEGkEJFwjrcdUXxEQAREQAREQARGIUQISrjHa8Wq2CIiACIiACIiACEQaAQnXSOsx1VcEREAEREAEREAEYpSAhGuMdryaLQIiIAIiIAIiIAKRRkDCNdJ6TPUVAREQAREQAREQgRglIOEaox2vZouACIiACIiACIhApBGQcI20HlN9RUAEREAEREAERCBGCUi4xmjHq9kiIAIiIAIiIAIiEGkEJFwjrcdUXxEQAREQAREQARGIUQISrjHa8Wq2CIiACIiACIiACEQaAQnXSOsx1VcEREAEREAEREAEYpSAhGuMdryaLQIiIAIiIAIiIAKRRiCnLxVu166dL8mVVgREQAREQAREQAREQARsI+CTcE1OTratYBkSAREQAREQAREQAREQAV8IZClcmzVrhmuuuQbHjx/3xa7SioAIiIAIiIAIiIAIiEAmAomJiZk++/oh7rwRfM2k9CIgAiIgAiIgAiIgAiKQ3QQ0OSu7ias8ERABERABERABERABvwhIuPqFTZlEQAREQAREQAREQASym4CEa3YTV3kiIAIiIAIiIAIiIAJ+EZBw9QubMomACIiACIiACIiACGQ3AQnX7Cau8kRABERABERABERABPwiIOHqFzZlEgEREAEREAEREAERyG4CEq7ZTVzliYAIiIAIiIAIiIAI+EVAwtUvbMokAiIgAiIgAiIgAiKQ3QQkXLObuMoTAREQAREQAREQARHwi8D/A/mgxd+tmkzfAAAAAElFTkSuQmCC"
+    }
+   },
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Tutorial I: The basic functionality\n",
+    "\n",
+    "In this first part of the Neural Graphs (NGs) tutorial we will focus on a simple example: training TaylorNet module to approximate a sine wave function. We will build a simple \"model graph\" and show how we can nest it into another graphs.\n",
+    "\n",
+    "![neural_graphs_nesting.png](attachment:neural_graphs_nesting.png)\n",
+    "\n",
+    "#### This part covers the following:\n",
+    " * how to create a Neural Graph object\n",
+    " * how to activate/deactivate graph context (in various ways)\n",
+    " * how to bind NG inputs and outpus (in various ways)\n",
+    " * how to nest one graph (representing the our \"trainable model\") into training and validation graphs\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate the necessary neural modules.\n",
+    "dl_training = RealFunctionDataLayer(n=10000, batch_size=32)\n",
+    "dl_validation = RealFunctionDataLayer(n=10000, batch_size=32)\n",
+    "tn = TaylorNet(dim=4)\n",
+    "loss = MSELoss()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Build the \"model\"graph.\n",
+    "simple_model = NeuralGraph(operation_mode=OperationMode.both)\n",
+    "\n",
+    "# Activate the \"graph context\".\n",
+    "simple_model.activate() \n",
+    "\n",
+    "# Create bound input port by copying the definition from input port \"x\" of TaylorNet.\n",
+    "simple_model.inputs[\"input\"] = tn.input_ports[\"x\"]\n",
+    "# Bind the \"x\" input, so that \"x\" of graph will \"lead\" to input port \"x\" of TaylorNet.\n",
+    "_ = tn(x=simple_model.inputs[\"input\"])\n",
+    "# Add the module for the second time, also binding the port.\n",
+    "_ = tn(x=simple_model.inputs[\"input\"])\n",
+    "# All outputs will be bound by default.\n",
+    "\n",
+    "# Deactivate the graph context.\n",
+    "simple_model.deactivate()\n",
+    "\n",
+    "# Let us see what the graph looks like.\n",
+    "logging.info(simple_model.summary())\n",
+    "# Please note that the graph is NOT COMPLETE, as it:\n",
+    "#  * doesn't contain a DataLayer, and\n",
+    "#  * has bound input ports that need to be connected."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# And how about a \"model graph\" with an arbitrary graph with a loop?\n",
+    "\n",
+    "# Create a new graph instance.\n",
+    "simple_model = NeuralGraph(operation_mode=OperationMode.both)\n",
+    "\n",
+    "# Activate the new \"graph context\" using the \"with\" statement.\n",
+    "with simple_model:\n",
+    "    # As this time we decided to stay with the original port name \"x\", we can use the \"default input binding\".\n",
+    "    embeddings = tn(x=simple_model)\n",
+    "    # Now create a loop and pass them back as inputs to TaylorNet instance.\n",
+    "    prediction = tn(x=embeddings)\n",
+    "    # Moreover, we are interested only in the second output, so we must \"manually bind\" it.\n",
+    "    simple_model.outputs[\"prediction\"] = prediction\n",
+    "# Ending \"with\" closes the \"graph context\".\n",
+    "    \n",
+    "# Ok, let us see what the graph looks like now.\n",
+    "logging.info(simple_model.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Anyway, for the rest of the example let's create a simple \"model graph\" wrapping just one module.\n",
+    "\n",
+    "# Create a new graph and open it's context in a single line.\n",
+    "with NeuralGraph(operation_mode=OperationMode.both) as simple_model:\n",
+    "    # As this time we decided to stay with the original port name \"x\", we can use the \"default input binding\".\n",
+    "    prediction = tn(x=simple_model)\n",
+    "    # Moreover, we are interested only in the second output, so we must \"manually bind\" it.\n",
+    "    simple_model.outputs[\"prediction\"] = prediction\n",
+    "    \n",
+    "# Ok, let us see what the graph looks like now.\n",
+    "logging.info(simple_model.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let us now compose a COMPLETE training graph.\n",
+    "# In particular, we will \"nest\" our \"model graph\" into this new training graph.\n",
+    "with NeuralGraph(operation_mode=OperationMode.training) as training_graph:\n",
+    "    # Take outputs from the training DL.\n",
+    "    x, t = dl_training()\n",
+    "    # Pass them to \"inner\" graph (nest!).\n",
+    "    p = simple_model(x=x)\n",
+    "    # Pass both of them to loss.\n",
+    "    lss = loss(predictions=p, target=t)\n",
+    "    # We will use \"loss\" as output during training, so we must \"manually bind\" it.\n",
+    "    training_graph.outputs[\"loss\"] = lss\n",
+    "    \n",
+    "# Ok, let us see what the graph looks like now.\n",
+    "logging.info(training_graph.summary())\n",
+    "# In the following plaese note that:\n",
+    "#  * during nesting the graph was flattened - 3 modules, 3 steps\n",
+    "#  * the input passed to \"simple_model\" bound input port were passed to the actual input of TaylorNet\n",
+    "#  * the graph is COMPLETE, i.e. there are no inputs that are bound and there is a single datalayer\n",
+    "# So in short: we can execute it!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Let us compose a COMPLETE validation graph.\n",
+    "with NeuralGraph(operation_mode=OperationMode.evaluation) as validation_graph:\n",
+    "    # Take outputs from the training DL.\n",
+    "    x_valid, t_valid = dl_validation()\n",
+    "    # Pass them to the trainable module.\n",
+    "    p_valid = simple_model(x=x_valid)\n",
+    "    loss_valid = loss(predictions=p_valid, target=t_valid)\n",
+    "\n",
+    "# Ok, let us see what the graph looks like now.\n",
+    "logging.info(validation_graph.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create training callback logging loss to console.\n",
+    "train_callback = SimpleLossLoggerCallback(\n",
+    "    tensors=[lss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}')\n",
+    ")\n",
+    "\n",
+    "# Create evaluator callback logging/aggregating the validation loss to console.\n",
+    "def batch_loss_per_batch_callback(tensors, global_vars):\n",
+    "    if \"batch_loss\" not in global_vars.keys():\n",
+    "        global_vars[\"batch_loss\"] = []\n",
+    "    for key, value in tensors.items():\n",
+    "        if key.startswith(\"loss\"):\n",
+    "            global_vars[\"batch_loss\"].append(torch.mean(torch.stack(value)))\n",
+    "\n",
+    "\n",
+    "def batch_loss_epoch_finished_callback(global_vars):\n",
+    "    epoch_loss = torch.max(torch.tensor(global_vars[\"batch_loss\"]))\n",
+    "    logging.info(\"Evaluation Loss: {0}\".format(epoch_loss))\n",
+    "    return dict({\"Evaluation Loss\": epoch_loss})\n",
+    "\n",
+    "\n",
+    "eval_callback = EvaluatorCallback(\n",
+    "    eval_tensors=[loss_valid],\n",
+    "    user_iter_callback=batch_loss_per_batch_callback,\n",
+    "    user_epochs_done_callback=batch_loss_epoch_finished_callback,\n",
+    "    eval_step=100,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Invoke the \"train\" action.\n",
+    "nf.reset_trainer()\n",
+    "nf.train(\n",
+    "    training_graph=training_graph,\n",
+    "    callbacks=[train_callback, eval_callback],\n",
+    "    optimization_params={\"num_epochs\": 3, \"lr\": 0.0003},\n",
+    "    optimizer=\"sgd\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "nemo-env",
+   "language": "python",
+   "name": "nemo-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/nlp/asr_postprocessor/asr_postprocessor.py b/examples/nlp/asr_postprocessor/asr_postprocessor.py
index 187529ddd2e4..40d7e0d3fa6f 100644
--- a/examples/nlp/asr_postprocessor/asr_postprocessor.py
+++ b/examples/nlp/asr_postprocessor/asr_postprocessor.py
@@ -60,14 +60,12 @@
 parser.add_argument("--attn_score_dropout", default=0.25, type=float)
 parser.add_argument("--attn_layer_dropout", default=0.25, type=float)
 parser.add_argument("--eval_step_frequency", default=2000, type=int)
-parser.add_argument("--data_dir", default="/dataset/", type=str)
+parser.add_argument("--data_dir", default="../../../tests/data/pred_real", type=str)
 parser.add_argument("--src_lang", default="pred", type=str)
 parser.add_argument("--tgt_lang", default="real", type=str)
 parser.add_argument("--beam_size", default=4, type=int)
 parser.add_argument("--len_pen", default=0.0, type=float)
-parser.add_argument(
-    "--restore_from", dest="restore_from", type=str, default="../../../scripts/bert-base-uncased_decoder.pt"
-)
+parser.add_argument("--restore_from", dest="restore_from", type=str, default="bert-base-uncased_decoder.pt")
 args = parser.parse_args()
 
 nf = nemo.core.NeuralModuleFactory(
@@ -113,7 +111,7 @@
     args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True
 )
 
-loss_fn = nemo_nlp.nm.losses.PaddedSmoothedCrossEntropyLossNM(pad_id=tokenizer.pad_id, label_smoothing=0.1)
+loss_fn = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss(pad_id=tokenizer.pad_id, label_smoothing=0.1)
 
 beam_search = nemo_nlp.nm.trainables.BeamSearchTranslatorNM(
     decoder=decoder,
@@ -174,7 +172,7 @@ def create_pipeline(dataset, tokens_in_batch, clean=False, training=True):
         input_ids_tgt=tgt, hidden_states_src=src_hiddens, input_mask_src=src_mask, input_mask_tgt=tgt_mask
     )
     log_softmax = t_log_softmax(hidden_states=tgt_hiddens)
-    loss = loss_fn(logits=log_softmax, target_ids=labels)
+    loss = loss_fn(logits=log_softmax, labels=labels)
     beam_results = None
     if not training:
         beam_results = beam_search(hidden_states_src=src_hiddens, input_mask_src=src_mask)
diff --git a/scripts/get_decoder_params_from_bert.py b/examples/nlp/asr_postprocessor/get_decoder_params_from_bert.py
similarity index 93%
rename from scripts/get_decoder_params_from_bert.py
rename to examples/nlp/asr_postprocessor/get_decoder_params_from_bert.py
index df4cd7c1d2f0..272bd2ab1b36 100644
--- a/scripts/get_decoder_params_from_bert.py
+++ b/examples/nlp/asr_postprocessor/get_decoder_params_from_bert.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 
 import torch
 from transformers import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
@@ -41,7 +42,7 @@
 
 parser = argparse.ArgumentParser(description="BERT parameters to decoder")
 parser.add_argument("--model_name", default="bert-base-uncased", type=str)
-parser.add_argument("--save_to", default="", type=str)
+parser.add_argument("--save_to", default="", type=str, help="folder to save output to.")
 
 args = parser.parse_args()
 
@@ -94,4 +95,6 @@
 tmp = torch.cat((new_decoder_weights['embedding_layer.token_embedding.weight'], zeros))
 
 new_decoder_weights['embedding_layer.token_embedding.weight'] = tmp
-torch.save(new_decoder_weights, args.save_to + args.model_name + "_decoder.pt")
+if not os.path.exists(args.save_to):
+    os.makedirs(args.save_to)
+torch.save(new_decoder_weights, os.path.join(args.save_to, args.model_name + "_decoder.pt"))
diff --git a/examples/nlp/biobert_notebooks/biobert_ner.ipynb b/examples/nlp/biobert_notebooks/biobert_ner.ipynb
new file mode 100644
index 000000000000..97ff5d40d67b
--- /dev/null
+++ b/examples/nlp/biobert_notebooks/biobert_ner.ipynb
@@ -0,0 +1,343 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "# !pip install wget\n",
+    "# !pip install git+https://github.com/NVIDIA/apex.git\n",
+    "# !pip install nemo_toolkit[nlp]\n",
+    "# !pip install unidecode\n",
+    "import os\n",
+    "import nemo\n",
+    "import nemo.collections.nlp as nemo_nlp\n",
+    "import numpy as np\n",
+    "import time\n",
+    "import errno\n",
+    "\n",
+    "from nemo.backends.pytorch.common.losses import CrossEntropyLossNM\n",
+    "from nemo.collections.nlp.nm.data_layers import BertTokenClassificationDataLayer\n",
+    "from nemo.collections.nlp.nm.trainables import TokenClassifier\n",
+    "from nemo.collections.nlp.callbacks.token_classification_callback import eval_epochs_done_callback, eval_iter_callback\n",
+    "from nemo.utils.lr_policies import get_lr_policy\n",
+    "from nemo import logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "BioBERT has the same network architecture as the original BERT, but instead of Wikipedia and BookCorpus it is pretrained on PubMed, a large biomedical text corpus, which achieves better performance in biomedical downstream tasks, such as question answering(QA), named entity recognition(NER) and relationship extraction(RE). This model was trained for 1M steps. For more information please refer to the original paper https://academic.oup.com/bioinformatics/article/36/4/1234/5566506.  For details about BERT please refer to https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo.\n",
+    "\n",
+    "\n",
+    "In this notebook we're going to showcase how to train BioBERT on a biomedical named entity recognition (NER) dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download model checkpoint\n",
+    "Download BioBert/BioMegatron checkpoints from  NGC: https://ngc.nvidia.com/catalog/models and put the encoder weights \n",
+    "at `./checkpoints/biobert/BERT.pt` or `./checkpoints/biomegatron/BERT.pt` and the model configuration file at `./checkpoints/biobert/bert_config.json` or `./checkpoints/biomegatron/bert_config.json`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set which model to use.\n",
+    "model_type=\"biobert\" # \"biomegatron\"\n",
+    "base_checkpoint_path={'biobert': './checkpoints/biobert/', 'biomegatron': './checkpoints/biomegatron/'}\n",
+    "pretrained_model_name={'biobert': 'bert-base-cased', 'biomegatron': 'megatron-bert-uncased'}\n",
+    "do_lower_case={'biobert': False, 'biomegatron': True}\n",
+    "work_dir={'biobert': 'output_ner_biobert', 'biomegatron': 'output_ner_biomegatron'}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the checkpoints are available from NGC: https://ngc.nvidia.com/catalog/models\n",
+    "CHECKPOINT_ENCODER = os.path.join(base_checkpoint_path[model_type], 'BERT.pt') # Model encoder checkpoint file\n",
+    "CHECKPOINT_CONFIG = os.path.join(base_checkpoint_path[model_type], 'bert_config.json') # Model configuration file\n",
+    "    \n",
+    "if not os.path.exists(CHECKPOINT_ENCODER):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_ENCODER)\n",
+    "\n",
+    "if not os.path.exists(CHECKPOINT_CONFIG):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_CONFIG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download training data\n",
+    "In this example we download the NER dataset NCBI-disease using token_classification/get_medical_data.py to ./datasets/ncbi-disease"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir=\"./datasets\"\n",
+    "dataset=\"ncbi-disease\"\n",
+    "!mkdir -p $data_dir\n",
+    "!python ../token_classification/get_medical_data.py --data_dir=$data_dir --dataset=$dataset\n",
+    "!python ../token_classification/import_from_iob_format.py --data_file=$data_dir/$dataset/train.tsv\n",
+    "!python ../token_classification/import_from_iob_format.py --data_file=$data_dir/$dataset/test.tsv\n",
+    "!python ../token_classification/import_from_iob_format.py --data_file=$data_dir/$dataset/dev.tsv\n",
+    "!ls -l $data_dir/$dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the previous step, you should have a ./datasets/ncbi-disease folder that contains the following files:\n",
+    "- labels_train.txt\n",
+    "- labels_dev.txt\n",
+    "- labels_text.txt\n",
+    "- text_train.txt\n",
+    "- text_dev.txt\n",
+    "- text_text.txt\n",
+    "\n",
+    "The format of the data described in NeMo docs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Neural Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_checkpoint=CHECKPOINT_ENCODER # language model encoder file\n",
+    "model_config=CHECKPOINT_CONFIG # model configuration file\n",
+    "work_dir=work_dir[model_type]\n",
+    "train_data_text_file=f\"{data_dir}/{dataset}/text_train.txt\"\n",
+    "train_data_label_file=f\"{data_dir}/{dataset}/labels_train.txt\"\n",
+    "eval_data_text_file=f\"{data_dir}/{dataset}/text_dev.txt\"\n",
+    "eval_data_label_file=f\"{data_dir}/{dataset}/labels_dev.txt\"\n",
+    "none_label=\"O\" \n",
+    "num_labels=3 # this should be the same number as number of labels in the training data\n",
+    "fc_dropout=0.1\n",
+    "max_seq_length=128\n",
+    "batch_size=32\n",
+    "\n",
+    "nf = nemo.core.NeuralModuleFactory(\n",
+    "    backend=nemo.core.Backend.PyTorch,\n",
+    "    placement=nemo.core.DeviceType.GPU\n",
+    ")\n",
+    "model = nemo_nlp.nm.trainables.get_pretrained_lm_model(\n",
+    "        config=model_config, pretrained_model_name=pretrained_model_name[model_type], checkpoint=model_checkpoint\n",
+    "    )\n",
+    "tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(\n",
+    "    tokenizer_name='nemobert',\n",
+    "    pretrained_model_name=pretrained_model_name[model_type],\n",
+    "    do_lower_case=do_lower_case[model_type]\n",
+    ")\n",
+    "hidden_size = model.hidden_size\n",
+    "classifier = TokenClassifier(hidden_size=hidden_size, num_classes=num_labels, dropout=fc_dropout, num_layers=1)\n",
+    "task_loss = CrossEntropyLossNM(logits_ndim=3)\n",
+    "train_data_layer = BertTokenClassificationDataLayer(\n",
+    "    tokenizer=tokenizer,\n",
+    "    text_file=train_data_text_file,\n",
+    "    label_file=train_data_label_file,\n",
+    "    pad_label=none_label,\n",
+    "    label_ids=None,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    use_cache=True\n",
+    ")\n",
+    "eval_data_layer = BertTokenClassificationDataLayer(\n",
+    "    tokenizer=tokenizer,\n",
+    "    text_file=eval_data_text_file,\n",
+    "    label_file=eval_data_label_file,\n",
+    "    pad_label=none_label,\n",
+    "    label_ids=train_data_layer.dataset.label_ids,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=False,\n",
+    "    use_cache=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating Neural graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = train_data_layer()\n",
+    "train_hidden_states = model(input_ids=train_data.input_ids, token_type_ids=train_data.input_type_ids, attention_mask=train_data.input_mask)\n",
+    "train_logits = classifier(hidden_states=train_hidden_states)\n",
+    "loss = task_loss(logits=train_logits, labels=train_data.labels, loss_mask=train_data.loss_mask)\n",
+    "# If you're training on multiple GPUs, this should be\n",
+    "# len(train_data_layer) // (batch_size * batches_per_step * num_gpus)\n",
+    "train_steps_per_epoch = len(train_data_layer) // batch_size\n",
+    "logging.info(f\"doing {train_steps_per_epoch} steps per epoch\")\n",
+    "\n",
+    "eval_data = eval_data_layer()\n",
+    "eval_hidden_states = model(input_ids=eval_data.input_ids, token_type_ids=eval_data.input_type_ids, attention_mask=eval_data.input_mask)\n",
+    "eval_logits = classifier(hidden_states=eval_hidden_states)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Callbacks\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[loss],\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())),\n",
+    "    get_tb_values=lambda x: [[\"loss\", x[0]]],\n",
+    "    step_freq=100,\n",
+    "    tb_writer=nf.tb_writer,\n",
+    ")\n",
+    "\n",
+    "# Callback to evaluate the model\n",
+    "eval_callback = nemo.core.EvaluatorCallback(\n",
+    "        eval_tensors=[eval_logits, eval_data.labels, eval_data.subtokens_mask],\n",
+    "        user_iter_callback=lambda x, y: eval_iter_callback(x, y),\n",
+    "        user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, train_data_layer.dataset.label_ids, f'{nf.work_dir}/graphs'),\n",
+    "        tb_writer=nf.tb_writer,\n",
+    "        eval_step=100\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Training\n",
+    "Training could take several minutes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "num_epochs=10\n",
+    "lr_warmup_proportion=0.1\n",
+    "lr=4e-5\n",
+    "weight_decay=0.01\n",
+    "lr_policy_fn = get_lr_policy(\"WarmupAnnealing\", total_steps=num_epochs * train_steps_per_epoch, warmup_ratio=lr_warmup_proportion\n",
+    ")\n",
+    "nf.train(\n",
+    "    tensors_to_optimize=[loss],\n",
+    "    callbacks=[train_callback, eval_callback],\n",
+    "    lr_policy=lr_policy_fn,\n",
+    "    optimizer=\"adam_w\",\n",
+    "    optimization_params={\"num_epochs\": num_epochs, \"lr\": lr, \"weight_decay\": weight_decay},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The result should look something like\n",
+    "```\n",
+    "[NeMo I 2020-05-22 17:13:48 token_classification_callback:82] Accuracy: 0.9882348032875798\n",
+    "[NeMo I 2020-05-22 17:13:48 token_classification_callback:86] F1 weighted: 98.82\n",
+    "[NeMo I 2020-05-22 17:13:48 token_classification_callback:86] F1 macro: 93.74\n",
+    "[NeMo I 2020-05-22 17:13:48 token_classification_callback:86] F1 micro: 98.82\n",
+    "[NeMo I 2020-05-22 17:13:49 token_classification_callback:89] precision    recall  f1-score   support\n",
+    "    \n",
+    "    O (label id: 0)     0.9938    0.9957    0.9947     22092\n",
+    "    B (label id: 1)     0.8843    0.9034    0.8938       787\n",
+    "    I (label id: 2)     0.9505    0.8982    0.9236      1090\n",
+    "    \n",
+    "           accuracy                         0.9882     23969\n",
+    "          macro avg     0.9429    0.9324    0.9374     23969\n",
+    "       weighted avg     0.9882    0.9882    0.9882     23969\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/nlp/biobert_notebooks/biobert_qa.ipynb b/examples/nlp/biobert_notebooks/biobert_qa.ipynb
new file mode 100644
index 000000000000..5ce919f854f6
--- /dev/null
+++ b/examples/nlp/biobert_notebooks/biobert_qa.ipynb
@@ -0,0 +1,562 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "# !pip install wget\n",
+    "# !pip install git+https://github.com/NVIDIA/apex.git\n",
+    "# !pip install nemo_toolkit[nlp]\n",
+    "# !pip install unidecode\n",
+    "import os\n",
+    "import nemo\n",
+    "import nemo.collections.nlp as nemo_nlp\n",
+    "import numpy as np\n",
+    "import time\n",
+    "import errno\n",
+    "import json\n",
+    "\n",
+    "from nemo.backends.pytorch.common.losses import CrossEntropyLossNM\n",
+    "from nemo.collections.nlp.nm.data_layers import BertQuestionAnsweringDataLayer\n",
+    "from nemo.collections.nlp.nm.trainables import TokenClassifier\n",
+    "from nemo.collections.nlp.callbacks.qa_squad_callback import eval_epochs_done_callback, eval_iter_callback\n",
+    "from nemo.utils.lr_policies import get_lr_policy\n",
+    "from nemo import logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "BioBERT has the same network architecture as the original BERT, but instead of Wikipedia and BookCorpus it is pretrained on PubMed, a large biomedical text corpus, which achieves better performance in biomedical downstream tasks, such as question answering(QA), named entity recognition(NER) and relationship extraction(RE). This model was trained for 1M steps. For more information please refer to the original paper https://academic.oup.com/bioinformatics/article/36/4/1234/5566506.  For details about BERT please refer to https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo.\n",
+    "\n",
+    "BioMegatron is an in house model, using Megatron https://github.com/NVIDIA/Megatron-LM pretrained on PubMed. The accuracy is better than using BioBERT on downstream tasks\n",
+    "\n",
+    "\n",
+    "In this notebook we're going to showcase how to train BioBERT/BioMegatron on a biomedical question answering (QA) dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download model  checkpoint\n",
+    "Download BioBert/BioMegatron checkpoints finetuned on SQuADv1.1 from  NGC: https://ngc.nvidia.com/catalog/models. Alternatively, you can also download BioBert/BioMegatron checkpoints and do the finetuning on SQuADv1.1 locally. This will take some time. For this, follow instructions at https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv1. \n",
+    "    Then, put the encoder weights at `./checkpoints/biobert/qa_squad/BERT.pt` or `./checkpoints/biomegatron/qa_squad/BERT.pt`, the model head weights at `./checkpoints/biobert/qa_squad/TokenClassifier.pt` or `./checkpoints/biomegatron/qa_squad/TokenClassifier.pt` and the model configuration file at `./checkpoints/biobert/qa_squad/bert_config.json` or `./checkpoints/biomegatron/qa_squad/bert_config.json`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set which model to use.\n",
+    "model_type=\"biobert\" # \"biomegatron\"\n",
+    "base_checkpoint_path={'biobert': './checkpoints/biobert/qa_squad', 'biomegatron': './checkpoints/biomegatron/qa_squad'}\n",
+    "pretrained_model_name={'biobert': 'bert-base-cased', 'biomegatron': 'megatron-bert-uncased'}\n",
+    "do_lower_case={'biobert': False, 'biomegatron': True}\n",
+    "work_dir={'biobert': 'output_bioasq_biobert', 'biomegatron': 'output_bioasq_biomegatron'}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the checkpoints are available from NGC: https://ngc.nvidia.com/catalog/models\n",
+    "CHECKPOINT_ENCODER = os.path.join(base_checkpoint_path[model_type], 'BERT.pt')\n",
+    "CHECKPOINT_HEAD = os.path.join(base_checkpoint_path[model_type], 'TokenClassifier.pt')\n",
+    "CHECKPOINT_CONFIG = os.path.join(base_checkpoint_path[model_type], 'bert_config.json')\n",
+    "    \n",
+    "if not os.path.exists(CHECKPOINT_ENCODER):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_ENCODER)\n",
+    "\n",
+    "if not os.path.exists(CHECKPOINT_HEAD):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_HEAD)\n",
+    "    \n",
+    "if not os.path.exists(CHECKPOINT_CONFIG):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_CONFIG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download training data\n",
+    "You first need to download the QA dataset BioASQ 7B to ./datasets/bioasq. Before using the files in this repository, you must first register BioASQ website and download the [BioASQ Task B](http://participants-area.bioasq.org/Tasks/A/getData/) data.\n",
+    "You can also download part of the data using ../question_answering/get_bioasq.py.\n",
+    "However the test labels for 7B need to be downloaded from the official website.\n",
+    "In the following we show an example for training and inference for 7B which is a superset of 6B."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_dir=\"./datasets\"\n",
+    "dataset=\"BioASQ\"\n",
+    "if not os.path.exists(f\"{data_dir}/{dataset}\"):\n",
+    "    !python ../question_answering/get_bioasq.py --data_dir=$data_dir\n",
+    "!ls -l $data_dir/$dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the previous step, you should have a ./datasets/BioASQ folder that contains the following files:\n",
+    "\n",
+    "- 6B1_golden.json\n",
+    "- 6B2_golden.json\n",
+    "- 6B3_golden.json\n",
+    "- 6B4_golden.json\n",
+    "- 6B5_golden.json\n",
+    "- BioASQ-6b/train/Full-Abstract/BioASQ-train-factoid-6b-full-annotated.json\n",
+    "- BioASQ-6b/test/BioASQ-6b/test/Full-Abstract/BioASQ-test-factoid-6b-1.json\n",
+    "- BioASQ-6b/test/BioASQ-6b/test/Full-Abstract/BioASQ-test-factoid-6b-2.json\n",
+    "- BioASQ-6b/test/BioASQ-6b/test/Full-Abstract/BioASQ-test-factoid-6b-3.json\n",
+    "- BioASQ-6b/test/BioASQ-6b/test/Full-Abstract/BioASQ-test-factoid-6b-4.json\n",
+    "- BioASQ-6b/test/BioASQ-6b/test/Full-Abstract/BioASQ-test-factoid-6b-5.json\n",
+    "- BioASQ-7b/train/Full-Abstract/BioASQ-train-factoid-7b-full-annotated.json\n",
+    "- BioASQ-7b/test/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-1.json\n",
+    "- BioASQ-7b/test/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-2.json\n",
+    "- BioASQ-7b/test/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-3.json\n",
+    "- BioASQ-7b/test/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-4.json\n",
+    "- BioASQ-7b/test/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-5.json\n",
+    "\n",
+    "The format of the data described in NeMo docs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Neural Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_checkpoint=CHECKPOINT_ENCODER # language model encoder file\n",
+    "head_checkpoint=CHECKPOINT_HEAD # language model encoder file\n",
+    "model_config=CHECKPOINT_CONFIG # model configuration file\n",
+    "work_dir=work_dir[model_type]\n",
+    "train_file=f\"{data_dir}/{dataset}/BioASQ-7b/train/Full-Abstract/BioASQ-train-factoid-7b-full-annotated.json\"\n",
+    "doc_stride=128\n",
+    "max_query_length=64\n",
+    "max_seq_length=384\n",
+    "batch_size=12\n",
+    "version_2_with_negative=False\n",
+    "\n",
+    "nf = nemo.core.NeuralModuleFactory(\n",
+    "    backend=nemo.core.Backend.PyTorch,\n",
+    "    placement=nemo.core.DeviceType.GPU,\n",
+    "    log_dir=work_dir\n",
+    ")\n",
+    "model = nemo_nlp.nm.trainables.get_pretrained_lm_model(\n",
+    "        config=model_config, pretrained_model_name=pretrained_model_name[model_type], checkpoint=model_checkpoint\n",
+    "    )\n",
+    "tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(\n",
+    "    tokenizer_name='nemobert',\n",
+    "    pretrained_model_name=pretrained_model_name[model_type],\n",
+    "    do_lower_case=do_lower_case[model_type]\n",
+    ")\n",
+    "hidden_size = model.hidden_size\n",
+    "qa_head = TokenClassifier(\n",
+    "    hidden_size=hidden_size, num_classes=2, num_layers=1, log_softmax=False, name=\"TokenClassifier\"\n",
+    ")\n",
+    "qa_head.restore_from(head_checkpoint)\n",
+    "task_loss = nemo_nlp.nm.losses.SpanningLoss()\n",
+    "# create training data layer, preprocessing takes a while. If you want to cache preprocessed data for future reuse use --use_cache=True\n",
+    "# remember to delete the cache when you switch the tokenizer/model (BioBERT and BioMegatron use different tokenizers)\n",
+    "train_data_layer = BertQuestionAnsweringDataLayer(\n",
+    "    mode=\"train\",\n",
+    "    tokenizer=tokenizer,\n",
+    "    version_2_with_negative=version_2_with_negative,\n",
+    "    data_file=train_file,\n",
+    "    max_query_length=max_query_length,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    doc_stride=doc_stride,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    use_cache=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating Neural graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = train_data_layer()\n",
+    "hidden_states = model(input_ids=train_data.input_ids, token_type_ids=train_data.input_type_ids, attention_mask=train_data.input_mask)\n",
+    "qa_output = qa_head(hidden_states=hidden_states)\n",
+    "loss = task_loss(logits=qa_output, start_positions=train_data.start_positions, end_positions=train_data.end_positions)\n",
+    "# If you're training on multiple GPUs, this should be\n",
+    "# len(train_data_layer) // (batch_size * batches_per_step * num_gpus)\n",
+    "train_steps_per_epoch = len(train_data_layer) // batch_size\n",
+    "logging.info(f\"doing {train_steps_per_epoch} steps per epoch\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Callbacks\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[loss.loss],\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())),\n",
+    "    get_tb_values=lambda x: [[\"loss\", x[0]]],\n",
+    "    step_freq=100,\n",
+    "    tb_writer=nf.tb_writer,\n",
+    ")\n",
+    "ckpt_callback = nemo.core.CheckpointCallback(\n",
+    "    folder=nf.checkpoint_dir, epoch_freq=1, step_freq=-1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training\n",
+    "this may take more than an hour."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs=5\n",
+    "lr=5e-6\n",
+    "lr_warmup_proportion=0\n",
+    "weight_decay=0\n",
+    "lr_policy_fn = get_lr_policy(\"WarmupAnnealing\", total_steps=num_epochs * train_steps_per_epoch, warmup_ratio=lr_warmup_proportion\n",
+    ")\n",
+    "nf.reset_trainer()\n",
+    "nf.train(\n",
+    "    tensors_to_optimize=[loss.loss],\n",
+    "    callbacks=[train_callback, ckpt_callback],\n",
+    "    lr_policy=lr_policy_fn,\n",
+    "    optimizer=\"adam_w\",\n",
+    "    optimization_params={\"num_epochs\": num_epochs, \"lr\": lr, \"weight_decay\": weight_decay},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference\n",
+    "Do inference on test data 7b-1 7b-2 7b-3 7b-4 7b-5. Here we only show inference with 7b-4. Rerun the following cells with all 5 test sets to get all numbers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_dataset=\"7b\"\n",
+    "test_dataset_idx=\"4\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_file=f\"{data_dir}/{dataset}/BioASQ-{test_dataset}/test/Full-Abstract/BioASQ-test-factoid-{test_dataset}-{test_dataset_idx}.json\"\n",
+    "logging.info(f\"using test file {test_file}\")\n",
+    "test_data_layer = BertQuestionAnsweringDataLayer(\n",
+    "    mode=\"test\",\n",
+    "    tokenizer=tokenizer,\n",
+    "    version_2_with_negative=version_2_with_negative,\n",
+    "    data_file=test_file,\n",
+    "    max_query_length=max_query_length,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    doc_stride=doc_stride,\n",
+    "    batch_size=1,\n",
+    "    shuffle=False,\n",
+    "    use_cache=True\n",
+    ")\n",
+    "\n",
+    "# Creating Neural test graph\n",
+    "test_data = test_data_layer()\n",
+    "test_hidden_states = model(input_ids=test_data.input_ids, token_type_ids=test_data.input_type_ids, attention_mask=test_data.input_mask)\n",
+    "test_qa_output = qa_head(hidden_states=test_hidden_states)\n",
+    "test_tensors=[test_data.unique_ids, test_qa_output]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "n_best_size=20\n",
+    "null_score_diff_threshold=0\n",
+    "max_answer_length=30\n",
+    "logging.info(f\"work dir {work_dir}, checkpoint dir {nf.checkpoint_dir}\")\n",
+    "output_prediction_file=f\"{work_dir}/predictions.json\"\n",
+    "output_nbest_file=f\"{work_dir}/nbest.json\"\n",
+    "evaluated_tensors = nf.infer(\n",
+    "    tensors=test_tensors, cache=False, offload_to_cpu=False, checkpoint_dir=nf.checkpoint_dir\n",
+    ")\n",
+    "unique_ids = []\n",
+    "for t in evaluated_tensors[0]:\n",
+    "    unique_ids.extend(t.tolist())\n",
+    "logits = []\n",
+    "for t in evaluated_tensors[1]:\n",
+    "    logits.extend(t.tolist())\n",
+    "start_logits, end_logits = np.split(np.asarray(logits), 2, axis=-1)\n",
+    "(all_predictions, all_nbest, scores_diff) = test_data_layer.dataset.get_predictions(\n",
+    "    unique_ids=unique_ids,\n",
+    "    start_logits=start_logits,\n",
+    "    end_logits=end_logits,\n",
+    "    n_best_size=n_best_size,\n",
+    "    max_answer_length=max_answer_length,\n",
+    "    version_2_with_negative=version_2_with_negative,\n",
+    "    null_score_diff_threshold=null_score_diff_threshold,\n",
+    "    do_lower_case=do_lower_case[model_type],\n",
+    ")\n",
+    "with open(output_nbest_file, \"w\") as writer:\n",
+    "    writer.write(json.dumps(all_nbest, indent=4) + \"\\n\")\n",
+    "with open(output_prediction_file, \"w\") as writer:\n",
+    "    writer.write(json.dumps(all_predictions, indent=4) + \"\\n\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "./datasets/BioASQ/BioASQ-7b/test/Full-Abstract/BioASQ-test-factoid-7b-4.json\n",
+      "        {\n",
+      "          \"context\": \"Construction of a natural panel of 11p11.2 deletions and further delineation of the critical region involved in Potocki-Shaffer syndrome. Potocki-Shaffer syndrome (PSS) is a contiguous gene deletion syndrome that results from haploinsufficiency of at least two genes within the short arm of chromosome 11[del(11)(p11.2p12)]. The clinical features of PSS can include developmental delay, mental retardation, multiple exostoses, parietal foramina, enlarged anterior fontanel, minor craniofacial anomalies, ophthalmologic anomalies, and genital abnormalities in males. We constructed a natural panel of 11p11.2-p13 deletions using cell lines from 10 affected individuals, fluorescence in situ hybridization (FISH), microsatellite analyses, and array-based comparative genomic hybridization (array CGH). We then compared the deletion sizes and clinical features between affected individuals. The full spectrum of PSS manifests when deletions are at least 2.1 Mb in size, spanning from D11S1393 to D11S1385/D11S1319 (44.6-46.7 Mb from the 11p terminus) and encompassing EXT2, responsible for multiple exostoses, and ALX4, causing parietal foramina. Yet one subject with parietal foramina whose deletion does not include ALX4 indicates that ALX4 in this subject may be rendered functionally haploinsufficient by a position effect. Based on comparative deletion mapping of eight individuals with the full PSS syndrome including mental retardation and two PSS families with no mental retardation, at least one gene related to mental retardation is likely located between D11S554 and D11S1385/D11S1319, 45.6-46.7 Mb from the 11p terminus.\", \n",
+      "          \"qas\": [\n",
+      "            {\n",
+      "              \"question\": \"What the chromsomal location of the gene that is deleted in Potocki-Shaffer syndrome?\", \n",
+      "              \"id\": \"5c72b7277c78d69471000073_001\"\n"
+     ]
+    }
+   ],
+   "source": [
+    "# a test question example would be \n",
+    "!echo $test_file\n",
+    "!grep -B 5 \"5c72b7277c78d69471000073_001\" $test_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "output_bioasq_biobert/nbest.json:    \"5c72b7277c78d69471000073_001\": [\r\n",
+      "output_bioasq_biobert/nbest.json-        {\r\n",
+      "output_bioasq_biobert/nbest.json-            \"text\": \"p11.2p12\",\r\n",
+      "output_bioasq_biobert/nbest.json-            \"probability\": 0.3970165418689914,\r\n",
+      "output_bioasq_biobert/nbest.json-            \"start_logit\": [\r\n",
+      "output_bioasq_biobert/nbest.json-                5.539025783538818\r\n",
+      "output_bioasq_biobert/nbest.json-            ],\r\n",
+      "output_bioasq_biobert/nbest.json-            \"end_logit\": [\r\n",
+      "output_bioasq_biobert/nbest.json-                6.180495738983154\r\n",
+      "output_bioasq_biobert/nbest.json-            ]\r\n",
+      "output_bioasq_biobert/nbest.json-        },\r\n",
+      "output_bioasq_biobert/nbest.json-        {\r\n",
+      "output_bioasq_biobert/nbest.json-            \"text\": \"p11.2p12)\",\r\n",
+      "output_bioasq_biobert/nbest.json-            \"probability\": 0.1409910996956543,\r\n",
+      "output_bioasq_biobert/nbest.json-            \"start_logit\": [\r\n",
+      "output_bioasq_biobert/nbest.json-                5.539025783538818\r\n",
+      "output_bioasq_biobert/nbest.json-            ],\r\n",
+      "output_bioasq_biobert/nbest.json-            \"end_logit\": [\r\n",
+      "output_bioasq_biobert/nbest.json-                5.145214557647705\r\n",
+      "output_bioasq_biobert/nbest.json-            ]\r\n",
+      "output_bioasq_biobert/nbest.json-        },\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# the corresponding first 2 best answers of the n-best list prediction with probabilities.\n",
+    "!grep -A 20 \"5c72b7277c78d69471000073_001\" $data_dir/$dataset/$prefix$suffix $output_nbest_file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "      \"exact_answer\": [\r\n",
+      "        [\r\n",
+      "          \"11p11.2p12\"\r\n",
+      "        ]\r\n",
+      "      ], \r\n",
+      "      \"concepts\": [], \r\n",
+      "      \"type\": \"factoid\", \r\n",
+      "      \"id\": \"5c72b7277c78d69471000073\", \r\n"
+     ]
+    }
+   ],
+   "source": [
+    "# the golden label can be found in this following file under \"exact_answer\". In this case, the it is equal to the prediction\n",
+    "prefix=test_dataset.upper()\n",
+    "suffix=f\"{test_dataset_idx}_golden.json\"\n",
+    "!grep -B 7 \"5c72b7277c78d69471000073\" $data_dir/$dataset/$prefix$suffix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate inference output with BioASQ metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists('bioasq-biobert'):\n",
+    "    print(\"clone https://github.com/dmis-lab/bioasq-biobert.git\")\n",
+    "    !git clone https://github.com/dmis-lab/bioasq-biobert.git && cd bioasq-biobert && git fetch origin pull/12/head:fix_indentation && git checkout fix_indentation && cd ..\n",
+    "if not os.path.exists('Evaluation-Measures'):\n",
+    "    print(\"clone https://github.com/BioASQ/Evaluation-Measures.git\")\n",
+    "    !git clone https://github.com/BioASQ/Evaluation-Measures.git && git checkout cd93f3b8eb290c965d18ef466ee28a0bcf451e5d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_nbest_dir=f\"{work_dir}/transformed_nbest\"\n",
+    "!mkdir -p $transformed_nbest_dir\n",
+    "!python bioasq-biobert/biocodes/transform_n2b_factoid.py --nbest_path=$output_nbest_file --output_path=$transformed_nbest_dir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "prefix=test_dataset.upper()\n",
+    "suffix=f\"{test_dataset_idx}_golden.json\"\n",
+    "! java -Xmx10G -cp Evaluation-Measures/flat/BioASQEvaluation/dist/BioASQEvaluation.jar evaluation.EvaluatorTask1b -phaseB -e 5 $data_dir/$dataset/$prefix$suffix $transformed_nbest_dir/BioASQform_BioASQ-answer.json"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With the default hyper parameters the result for 7b-4 test factoid will look something like this for BioBERT:\n",
+    "\n",
+    "```0.0 0.45161290322580644 0.6774193548387096 0.5403225806451613 0.0 0.0 0.0 0.0 0.0 0.0```\n",
+    "\n",
+    "and for BioMegatron:\n",
+    "\n",
+    "```0.0 0.6470588235 0.8235294118 0.7254901961 0.0 0.0 0.0 0.0 0.0 0.0```\n",
+    "\n",
+    "where the second, third and fourth numbers will be strict accuracy (SAcc), lenient accuracy (LAcc) and mean reciprocal rank (MRR) for factoid\n",
+    "questions respectively."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The class weighted average for 7B test factoid of all 5 tasks:\n",
+    "\n",
+    "| Model | SAcc | LAcc | MRR |\n",
+    "| :---         |     :---:      |        :---:     |     :---: |\n",
+    "|BioMegatron   | 0.39     | 0.6 | 0.47   |\n",
+    "|BioBERT     | 0.48       | 0.64    |0.54|"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/nlp/biobert_notebooks/biobert_re.ipynb b/examples/nlp/biobert_notebooks/biobert_re.ipynb
new file mode 100644
index 000000000000..f755e1abcc3b
--- /dev/null
+++ b/examples/nlp/biobert_notebooks/biobert_re.ipynb
@@ -0,0 +1,339 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "# !pip install wget\n",
+    "# !pip install git+https://github.com/NVIDIA/apex.git\n",
+    "# !pip install nemo_toolkit[nlp]\n",
+    "# !pip install unidecode\n",
+    "import os\n",
+    "import nemo\n",
+    "import nemo.collections.nlp as nemo_nlp\n",
+    "import numpy as np\n",
+    "import time\n",
+    "import errno\n",
+    "\n",
+    "from nemo.backends.pytorch.common.losses import CrossEntropyLossNM\n",
+    "from nemo.collections.nlp.data.datasets import TextClassificationDataDesc\n",
+    "from nemo.collections.nlp.nm.data_layers import BertTextClassificationDataLayer\n",
+    "from nemo.collections.nlp.nm.trainables import SequenceClassifier\n",
+    "from nemo.collections.nlp.callbacks.text_classification_callback import eval_epochs_done_callback, eval_iter_callback\n",
+    "from nemo.utils.lr_policies import get_lr_policy\n",
+    "from nemo import logging"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction\n",
+    "BioBERT has the same network architecture as the original BERT, but instead of Wikipedia and BookCorpus it is pretrained on PubMed, a large biomedical text corpus, which achieves better performance in biomedical downstream tasks, such as question answering(QA), named entity recognition(NER) and relationship extraction(RE). This model was trained for 1M steps. For more information please refer to the original paper https://academic.oup.com/bioinformatics/article/36/4/1234/5566506.  For details about BERT please refer to https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo.\n",
+    "\n",
+    "\n",
+    "In this notebook we're going to showcase how to train BioBERT on a biomedical relation extraction (RE) dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download model checkpoint\n",
+    "Download BioBert/BioMegatron checkpoints from  NGC: https://ngc.nvidia.com/catalog/models and put the encoder weights \n",
+    "at `./checkpoints/biobert/BERT.pt` or `./checkpoints/biomegatron/BERT.pt` and the model configuration file at `./checkpoints/biobert/bert_config.json` or `./checkpoints/biomegatron/bert_config.json`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set which model to use.\n",
+    "model_type=\"biobert\" # \"biomegatron\"\n",
+    "base_checkpoint_path={'biobert': './checkpoints/biobert/', 'biomegatron': './checkpoints/biomegatron/'}\n",
+    "pretrained_model_name={'biobert': 'bert-base-cased', 'biomegatron': 'megatron-bert-uncased'}\n",
+    "do_lower_case={'biobert': False, 'biomegatron': True}\n",
+    "work_dir={'biobert': 'output_re_biobert', 'biomegatron': 'output_re_biomegatron'}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# the checkpoints are available from NGC: https://ngc.nvidia.com/catalog/models\n",
+    "CHECKPOINT_ENCODER = os.path.join(base_checkpoint_path[model_type], 'BERT.pt') # Model encoder checkpoint file\n",
+    "CHECKPOINT_CONFIG = os.path.join(base_checkpoint_path[model_type], 'bert_config.json') # Model configuration file\n",
+    "    \n",
+    "if not os.path.exists(CHECKPOINT_ENCODER):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_ENCODER)\n",
+    "\n",
+    "if not os.path.exists(CHECKPOINT_CONFIG):\n",
+    "    raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), CHECKPOINT_CONFIG)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download training data\n",
+    "In this example we download the RE dataset chemprot to ./datasets/chemprot and process it with text_classification/data/import_datasets.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#download https://github.com/arwhirang/recursive_chemprot/blob/master/Demo/tree_LSTM/data/chemprot-data_treeLSTM.zip and extract it into ./datasets/chemprot\n",
+    "data_dir=\"./datasets\"\n",
+    "dataset=\"chemprot\"\n",
+    "if not os.path.exists(f\"{data_dir}/{dataset}\"):\n",
+    "    !mkdir -p $data_dir/$dataset\n",
+    "    !wget \"https://github.com/arwhirang/recursive_chemprot/blob/master/Demo/tree_LSTM/data/chemprot-data_treeLSTM.zip?raw=true\" -O data.zip\n",
+    "    !unzip data.zip -d $data_dir/$dataset\n",
+    "    !rm data.zip\n",
+    "\n",
+    "!python ../text_classification/data/import_datasets.py --source_data_dir=$data_dir/$dataset --target_data_dir=$data_dir/$dataset --dataset_name=$dataset\n",
+    "!ls -l $data_dir/$dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the previous step, you should have a ./datasets/chemprot folder that contains the following files:\n",
+    "- train.tsv\n",
+    "- test.tsv\n",
+    "- dev.tsv\n",
+    "- label_mapping.tsv\n",
+    "\n",
+    "The format of the data described in NeMo docs."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Neural Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_checkpoint=CHECKPOINT_ENCODER # language model encoder file\n",
+    "model_config=CHECKPOINT_CONFIG # model configuration file\n",
+    "work_dir=work_dir[model_type]\n",
+    "train_data_text_file=f\"{data_dir}/{dataset}/train.tsv\"\n",
+    "eval_data_text_file=f\"{data_dir}/{dataset}/dev.tsv\"\n",
+    "fc_dropout=0.1\n",
+    "max_seq_length=128\n",
+    "batch_size=32\n",
+    "num_output_layers=1\n",
+    "\n",
+    "nf = nemo.core.NeuralModuleFactory(\n",
+    "    backend=nemo.core.Backend.PyTorch,\n",
+    "    placement=nemo.core.DeviceType.GPU\n",
+    ")\n",
+    "model = nemo_nlp.nm.trainables.get_pretrained_lm_model(\n",
+    "        config=model_config, pretrained_model_name=pretrained_model_name[model_type], checkpoint=model_checkpoint\n",
+    "    )\n",
+    "tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(\n",
+    "    tokenizer_name='nemobert',\n",
+    "    pretrained_model_name=pretrained_model_name[model_type],\n",
+    "    do_lower_case=do_lower_case[model_type]\n",
+    ")\n",
+    "hidden_size = model.hidden_size\n",
+    "data_desc = TextClassificationDataDesc(data_dir=f\"{data_dir}/{dataset}\", modes=['train', 'dev'])\n",
+    "classifier = nemo_nlp.nm.trainables.SequenceClassifier(    \n",
+    "    hidden_size=hidden_size,\n",
+    "    num_classes=data_desc.num_labels,\n",
+    "    dropout=fc_dropout,\n",
+    "    num_layers=num_output_layers,\n",
+    "    log_softmax=False,\n",
+    ")\n",
+    "task_loss = CrossEntropyLossNM(weight=None)\n",
+    "train_data_layer = BertTextClassificationDataLayer(\n",
+    "    tokenizer=tokenizer,\n",
+    "    input_file=train_data_text_file,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=True,\n",
+    "    use_cache=True\n",
+    ")\n",
+    "eval_data_layer = BertTextClassificationDataLayer(\n",
+    "    tokenizer=tokenizer,\n",
+    "    input_file=eval_data_text_file,\n",
+    "    max_seq_length=max_seq_length,\n",
+    "    batch_size=batch_size,\n",
+    "    shuffle=False,\n",
+    "    use_cache=False\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Creating Neural graph"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = train_data_layer()\n",
+    "train_hidden_states = model(input_ids=train_data.input_ids, token_type_ids=train_data.input_type_ids, attention_mask=train_data.input_mask)\n",
+    "train_logits = classifier(hidden_states=train_hidden_states)\n",
+    "loss = task_loss(logits=train_logits, labels=train_data.labels)\n",
+    "# If you're training on multiple GPUs, this should be\n",
+    "# len(train_data_layer) // (batch_size * batches_per_step * num_gpus)\n",
+    "train_steps_per_epoch = len(train_data_layer) // batch_size\n",
+    "logging.info(f\"doing {train_steps_per_epoch} steps per epoch\")\n",
+    "\n",
+    "eval_data = eval_data_layer()\n",
+    "eval_hidden_states = model(input_ids=eval_data.input_ids, token_type_ids=eval_data.input_type_ids, attention_mask=eval_data.input_mask)\n",
+    "eval_logits = classifier(hidden_states=eval_hidden_states)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Callbacks\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "    tensors=[loss],\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())),\n",
+    "    get_tb_values=lambda x: [[\"loss\", x[0]]],\n",
+    "    step_freq=100,\n",
+    "    tb_writer=nf.tb_writer,\n",
+    ")\n",
+    "\n",
+    "# Callback to evaluate the model\n",
+    "eval_callback = nemo.core.EvaluatorCallback(\n",
+    "        eval_tensors=[eval_logits, eval_data.labels],\n",
+    "        user_iter_callback=lambda x, y: eval_iter_callback(x, y, eval_data_layer),\n",
+    "        user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),\n",
+    "        tb_writer=nf.tb_writer,\n",
+    "        eval_step=500,\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training\n",
+    "Training could take several minutes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_epochs=3\n",
+    "lr_warmup_proportion=0.1\n",
+    "lr=3e-5\n",
+    "weight_decay=0.01\n",
+    "lr_policy_fn = get_lr_policy(\"WarmupAnnealing\", total_steps=num_epochs * train_steps_per_epoch, warmup_ratio=lr_warmup_proportion\n",
+    ")\n",
+    "nf.train(\n",
+    "    tensors_to_optimize=[loss],\n",
+    "    callbacks=[train_callback, eval_callback],\n",
+    "    lr_policy=lr_policy_fn,\n",
+    "    optimizer=\"adam_w\",\n",
+    "    optimization_params={\"num_epochs\": num_epochs, \"lr\": lr, \"weight_decay\": weight_decay},\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The result should look something like this:\n",
+    "```\n",
+    "precision    recall  f1-score   support\n",
+    "    \n",
+    "               0     0.7328    0.8348    0.7805       115\n",
+    "               1     0.9402    0.9291    0.9346      7950\n",
+    "               2     0.8311    0.9146    0.8708       199\n",
+    "               3     0.6400    0.6302    0.6351       457\n",
+    "               4     0.8002    0.8317    0.8156      1093\n",
+    "               5     0.7228    0.7518    0.7370       548\n",
+    "    \n",
+    "        accuracy                         0.8949     10362\n",
+    "       macro avg     0.7778    0.8153    0.7956     10362\n",
+    "    weighted avg     0.8963    0.8949    0.8954     10362\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/nlp/dialogue_state_tracking/data/dialogue_augmentation_for_sgd_format.py b/examples/nlp/dialogue_state_tracking/data/dialogue_augmentation_for_sgd_format.py
new file mode 100644
index 000000000000..52687af839f9
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/data/dialogue_augmentation_for_sgd_format.py
@@ -0,0 +1,514 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+import copy
+import json
+import os
+import random
+import re
+from collections import defaultdict
+from pprint import pprint
+
+import inflect
+import numpy as np
+from tqdm import tqdm
+
+p = inflect.engine()
+
+
+def get_ontology(dialogues, schemas):
+    """
+    creates ontology: 
+        (service_name, slot_name) -> 
+            -> is_categorical -> True/False
+            -> possible_values -> set of values
+    """
+    ontology = defaultdict(defaultdict)
+    for schema in schemas:
+        service_name = schema['service_name']
+        for slot in schema['slots']:
+            slot_name = slot['name']
+            ontology[(service_name, slot_name)]["is_categorical"] = slot['is_categorical']
+            ontology[(service_name, slot_name)]["possible_values"] = set(slot['possible_values'])
+
+    for dialogue in dialogues:
+        for turn in dialogue["turns"]:
+            for frame in turn["frames"]:
+                service_name = frame["service"]
+                if "state" in frame:
+                    for k, vs in frame["state"]["slot_values"].items():
+                        for v in vs:
+                            ontology[(service_name, k)]["possible_values"].add(v)
+                if "actions" in frame:
+                    for action in frame["actions"]:
+                        k = action["slot"]
+                        for v in action["values"]:
+                            if (service_name, k) in ontology:
+                                # some slots like 'count' are not in schema
+                                ontology[(service_name, k)]["possible_values"].add(v)
+    return ontology
+
+
+def get_affected_future_frames(dialogue, from_turn_id, slot_name, slot_value, service):
+    """
+    determine for all turns starting from from_turn_id if they contain the given combination of slot_name, slot_value, service
+    if so, return affected List[(turn_id, frame_id, slot_name)]
+    """
+    assert isinstance(from_turn_id, int)
+    assert isinstance(slot_name, str)
+    assert isinstance(slot_value, str)
+    assert isinstance(service, str)
+    res = []
+    for turn_id, turn in enumerate(dialogue["turns"][from_turn_id:], start=from_turn_id):
+        for frame_id, frame in enumerate(turn["frames"]):
+            if turn["speaker"] == "SYSTEM":
+                if frame["service"] == service:
+                    for action in frame["actions"]:
+                        if action["slot"] == slot_name and slot_value in action["values"]:
+                            res.append((turn_id, frame_id, slot_name))
+                            continue
+            else:
+                if frame["service"] == service and slot_value in frame["state"]["slot_values"].get(slot_name, []):
+                    res.append((turn_id, frame_id, slot_name))
+                    continue
+    return res
+
+
+def augment_dialog_by_auxiliary_entries(dialogue):
+    """
+    augments dialogue by slot_to_span and state_update.
+    slot_to_span (dict): slotname-> value-> [start_idx, end_idx] for all values in turn that appear exactly once in utterance. 
+    state_update (dict): slotname-> [(turn_id, frame_id, slot_name)] only contains newly introduced slotnames. 
+        New for system are all slots in "actions".
+        New for user are all slots who did not appear in previous turn or whose (list of) value has changed.
+    Returns list of following affected turns/frames.  
+
+    """
+    prev_service_user = ""
+    prev_state_slots_user = {}  # key, value
+    for turn_id, turn in enumerate(dialogue["turns"]):
+        for frame in turn["frames"]:
+            slot_to_spans = defaultdict(dict)
+            for slot in frame["slots"]:
+                k = slot["slot"]
+                start_idx, end_idx = slot["start"], slot["exclusive_end"]
+                slot_to_spans[k][turn["utterance"][start_idx:end_idx]] = [start_idx, end_idx]
+            frame["slot_to_span"] = slot_to_spans
+
+        if turn["speaker"] == "SYSTEM":
+            for frame in turn["frames"]:
+                new_slots = defaultdict(list)
+                for action in frame["actions"]:
+                    slot = action["slot"]
+                    slot_values = action["values"]
+                    for v in slot_values:
+                        new_slots[slot] = get_affected_future_frames(
+                            dialogue, turn_id + 1, slot_name=slot, slot_value=v, service=frame["service"]
+                        )
+                        if v in turn["utterance"]:
+                            if slot not in frame["slot_to_span"] or v not in frame["slot_to_span"][slot]:
+                                if len(turn["utterance"].split(v)) == 2:
+                                    start_idx = turn["utterance"].index(v)
+                                    end_idx = start_idx + len(v)
+                                    frame["slot_to_span"][slot][v] = [start_idx, end_idx]
+                frame["state_update"] = new_slots
+        else:
+            for frame in turn["frames"]:
+                new_slots = defaultdict(list)  # map from slot_value -> List[frames] in future
+                for k, vs in frame["state"]["slot_values"].items():
+                    for v_id, v in enumerate(vs):
+                        if v in turn["utterance"]:
+                            if k not in frame["slot_to_span"] or v not in frame["slot_to_span"][k]:
+                                if len(turn["utterance"].split(v)) == 2:
+                                    start_idx = turn["utterance"].index(v)
+                                    end_idx = start_idx + len(v)
+                                    frame["slot_to_span"][k][v] = [start_idx, end_idx]
+                        if k not in prev_state_slots_user or v not in prev_state_slots_user[k]:
+                            new_slots[k] = get_affected_future_frames(
+                                dialogue, turn_id + 1, slot_name=k, slot_value=v, service=frame["service"]
+                            )
+                frame["state_update"] = new_slots
+
+            if len(turn["frames"]) == 1:
+                use_frame = turn["frames"][0]
+            else:
+                use_frame = [frame for frame in turn["frames"] if frame["service"] != prev_service_user][0]
+            prev_service_user = use_frame["service"]
+            prev_state_slots_user = use_frame["state"]["slot_values"]
+
+
+def validate(dialogue):
+    """
+    check if dialogue is valid wrt to non categorical slots:
+        -check if span indices are within utterance length
+        -check if utterance substring (by span) is found among values in system action
+        -check if utterance substring (by span) is found among values in user state->slot_values->key
+    Otherwise raise error with turn id and frame id
+    """
+    for turn_id, turn in enumerate(dialogue["turns"]):
+        for frame_id, frame in enumerate(turn["frames"]):
+            for slot in frame["slots"]:
+                try:
+                    st_idx, end_idx, key = slot["start"], slot["exclusive_end"], slot["slot"]
+                    word = turn["utterance"][st_idx:end_idx]
+                    assert 0 <= st_idx < end_idx <= len(turn["utterance"])
+                    if turn["speaker"] == "SYSTEM":
+                        found_key = False
+                        for action in frame["actions"]:
+                            if action["slot"] == key:
+                                if word in action["values"]:
+                                    found_key = True
+                        assert found_key
+                    else:
+                        if key in frame["state"]["slot_values"]:
+                            assert word in frame["state"]["slot_values"][key]
+                except Exception:
+                    raise ValueError(f"Turn {turn_id}, frame {frame_id}")
+
+
+def process_dialogues(final_dialogues, dialogue_count, dialogues, replace_turn_prob, replace_word_prob, new_val_func):
+    """
+    iterates through all dialogues and does replacement according to new_val_func
+    writes out into final_dialogues.
+    """
+    replace_success = 0
+    replace_failed = 0
+    for dialogue_id, dialogue in tqdm(enumerate(dialogues)):
+        d_id, d_count = dialogue["dialogue_id"].split("_")
+        d_id = int(d_id)
+        dialogue["dialogue_id"] = f"{d_id}_{dialogue_count[d_id]:05d}"
+        dialogue_count[d_id] += 1
+        for turn_id, turn in enumerate(dialogue["turns"]):
+            if random.random() < replace_turn_prob:
+                spans = get_sentence_components(turn=turn)
+                for span in reversed(spans):
+                    if random.random() < replace_word_prob:
+                        old_value = dialogue["turns"][turn_id]["utterance"][span[0] : span[1]]
+                        new_value = new_val_func(dialogue, turn_id, old_value, span[0], span[1])
+                        if new_value:
+                            tmp_dialogue = copy.deepcopy(dialogue)
+                            try:
+                                replace(tmp_dialogue, turn_id, span[0], span[1], new_value)
+                                validate(tmp_dialogue)
+                                for k, v in tmp_dialogue.items():
+                                    dialogue[k] = v
+                                replace_success += 1
+                            except Exception:
+                                replace_failed += 1
+        final_dialogues[d_id].append(dialogue)
+    print(f"Replacement success {replace_success}, failed {replace_failed}\n")
+
+
+def update_spans(dialogue, turn_id, frame_id, start_idx, end_idx, old_value, new_value):
+    """
+    update slot spans and slot_to_span
+    """
+    frame = dialogue["turns"][turn_id]["frames"][frame_id]
+    offset = len(new_value) - len(old_value)
+
+    for slot in frame['slots']:
+        if start_idx < slot['start']:
+            slot['start'] += offset
+        if start_idx < slot['exclusive_end']:
+            slot['exclusive_end'] += offset
+
+    for k, vs in frame['slot_to_span'].items():
+        for v, spans in vs.items():
+            if start_idx < spans[0]:
+                spans[0] += offset
+            if start_idx < spans[1]:
+                spans[1] += offset
+
+
+def update_values(dialogue, turn_id, frame_id, key, old_value, new_value):
+    """
+    only update values: actions, state, slot_to_span
+    """
+    frame = dialogue["turns"][turn_id]["frames"][frame_id]
+    if "actions" in frame:
+        for action in frame["actions"]:
+            if key == action["slot"] and old_value in action["values"]:
+                action["values"].remove(old_value)
+                action["values"].append(new_value)
+    if "state" in frame:
+        for k, vs in frame["state"]["slot_values"].items():
+            for v_id, v in enumerate(vs):
+                if k == key and v == old_value:
+                    vs[v_id] = new_value
+
+    for k, vs in frame["slot_to_span"].items():
+        for v, spans in list(vs.items()):
+            if k == key and v == old_value:
+                vs.pop(v)
+                vs[new_value] = spans
+
+
+def get_sentence_components(turn):
+    """
+    return list of start and end indices of slot values/ words that appear in utterance
+    """
+    sentence = turn["utterance"]
+    word_indices = np.asarray([False for _ in range(len(sentence) + 1)])
+    for frame in turn["frames"]:
+        if "state" in frame:
+            for k, vs in frame["state"]["slot_values"].items():
+                for v in vs:
+                    if v in sentence:
+                        start_idx = sentence.index(v)
+                        end_idx = start_idx + len(v)
+                        word_indices[start_idx:end_idx] = True
+        if "actions" in frame:
+            for action in frame["actions"]:
+                k = action["slot"]
+                for v in action["values"]:
+                    if v in sentence:
+                        start_idx = sentence.index(v)
+                        end_idx = start_idx + len(v)
+                        word_indices[start_idx:end_idx] = True
+
+    for i in range(len(sentence)):
+        if sentence[i].isalnum():
+            word_indices[i] = True
+    res = []
+    idx = 0
+    while idx < len(word_indices):
+        if word_indices[idx]:
+            start_idx = idx
+            while word_indices[idx]:
+                idx += 1
+            end_idx = idx
+            res.append((start_idx, end_idx))
+        idx += 1
+    return res
+
+
+def find_word_in_turn(dialogue, turn_id, value, start_idx, end_idx):
+    """
+    find non-cat slot value in turn.
+    return  List[(turn_id, frame_id, key)]
+    """
+    assert isinstance(value, str)
+    frames = dialogue["turns"][turn_id]["frames"]
+    res = []
+    for frame_id, frame in enumerate(frames):
+        for slot in frame["slots"]:
+            if start_idx == slot["start"] and end_idx == slot["exclusive_end"]:
+                res.append((turn_id, frame_id, slot["slot"]))
+    return res
+
+
+def get_new_value(dialogue, turn_id, value, start_idx, end_idx):
+    """
+    replace span with another value from ontology if this belongs non-cat slot
+    return new value
+    """
+    candidates = find_word_in_turn(dialogue, turn_id, value, start_idx, end_idx)
+    possible_values = set()
+    for _, frame_id, k in candidates:
+        frame = dialogue["turns"][turn_id]["frames"][frame_id]
+        service = frame["service"]
+        if "possible_values" in ontology[(service, k)]:
+            possible_values.update(ontology[(service, k)]["possible_values"])
+    return random.choice(list(possible_values)) if possible_values else None
+
+
+def replace(dialogue, turn_id, start_idx, end_idx, new_value):
+    """
+    replace utterance at turn_id around start_idx:end_idx with new_value.
+    If old value is found in turn (non-categorical slot), change all affected frames with new_value:
+        -update_values
+        -update_spans
+    """
+    assert isinstance(turn_id, int)
+    assert isinstance(start_idx, int)
+    assert isinstance(end_idx, int)
+    turn = dialogue["turns"][turn_id]
+    sentence = turn["utterance"]
+    old_value = sentence[start_idx:end_idx]
+    affected_values = find_word_in_turn(
+        dialogue=dialogue, turn_id=turn_id, value=old_value, start_idx=start_idx, end_idx=end_idx
+    )
+    affected_spans = [(turn_id, start_idx, end_idx)]
+    for _, frame_id, key in affected_values.copy():
+        frame = dialogue["turns"][turn_id]["frames"][frame_id]
+        new_affected_values = frame["state_update"][key]
+        affected_values += new_affected_values
+        for a_turn_id, a_frame_id, a_key in new_affected_values:
+            assert key == a_key
+            spans = (
+                dialogue["turns"][a_turn_id]["frames"][a_frame_id]["slot_to_span"].get(a_key, {}).get(old_value, None)
+            )
+            if spans:
+                affected_spans += [(a_turn_id, spans[0], spans[1])]
+
+    for a_turn_id, a_frame_id, a_key in affected_values:
+        update_values(dialogue, a_turn_id, a_frame_id, a_key, old_value, new_value)
+    for a_turn_id, start_idx, end_idx in affected_spans:
+        turn = dialogue["turns"][a_turn_id]
+        assert old_value == turn["utterance"][start_idx:end_idx]
+        for a_frame_id in range(len(turn["frames"])):
+            update_spans(dialogue, a_turn_id, a_frame_id, start_idx, end_idx, old_value, new_value)
+        turn["utterance"] = turn["utterance"][:start_idx] + new_value + turn["utterance"][end_idx:]
+
+
+def num2str(dialogue, turn_id, old_value, start_idx, end_idx):
+    """
+    gets old_value and returns stringified version if old_value was number and does not belong to non-cat span value
+    """
+    res = find_word_in_turn(dialogue, turn_id, old_value, start_idx, end_idx)
+    if not res and old_value.isnumeric():
+        return p.number_to_words(int(old_value)) + " " + old_value
+    return None
+
+
+def test_helper(dialogue, dialogue_id, turn_id, start_idx, end_idx, new_value):
+    replace(dialogue, turn_id=turn_id, start_idx=start_idx, end_idx=end_idx, new_value=new_value)
+    for turn in dialogue["turns"]:
+        for frame in turn["frames"]:
+            if "state_update" in frame:
+                frame.pop("state_update")
+
+
+def test(dialogues, dialogue_id, turn_id, old_value, new_value):
+    dialogue = copy.deepcopy(dialogues[dialogue_id])
+    augment_dialog_by_auxiliary_entries(dialogue)
+    m = re.search(old_value, dialogue["turns"][turn_id]["utterance"])
+    test_helper(dialogue, dialogue_id, turn_id, start_idx=m.start(), end_idx=m.end(), new_value=new_value)
+    pprint(dialogue)
+    validate(dialogue)
+    d_str_new = json.dumps(dialogue, sort_keys=True, indent=2)
+    d_str_old = json.dumps(dialogues[dialogue_id], sort_keys=True, indent=2)
+    print(d_str_new == d_str_old)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--concat_orig_dialogue", action="store_true", help="contenate original dialogue to the augmented one"
+    )
+    parser.add_argument(
+        "--input_dir",
+        type=str,
+        default="",
+        help="data directory. contains one schema.json and multiple dialogue*.json files",
+    )
+    parser.add_argument("--output_dir", type=str, help="output data directory", default=None)
+    parser.add_argument("--num2string", action="store_true", help="convert digits to string")
+    parser.add_argument("--repeat", type=int, default=5, help="number of augmentation sweeps over input data")
+    parser.add_argument("--replace_turn_prob", type=float, default=1.0, help="likelihood to modify an utterance turn")
+    parser.add_argument(
+        "--replace_word_prob", type=float, default=1.0, help="likelihood to modify a word in an utterance"
+    )
+    parser.add_argument("--seed", type=int, default=0)
+    args = parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+
+    args = parse_args()
+    print(vars(args))
+    random.seed(args.seed)
+
+    if not os.path.exists(args.input_dir):
+        raise ValueError(
+            "SGD dataset not found. Dataset can be downloaded from https://github.com/google-research-datasets/dstc8-schema-guided-dialogue"
+        )
+
+    in_file_path = args.input_dir
+    schema_path = os.path.join(in_file_path, 'schema.json')
+    dialogue_files = [
+        os.path.join(in_file_path, f)
+        for f in os.listdir(in_file_path)
+        if os.path.isfile(os.path.join(in_file_path, f))
+        if "dialogue" in f
+    ]
+    dialogue_files.sort()
+    orig_dialog = []
+    for d_file in dialogue_files:
+        orig_dialog.extend(json.load(open(d_file, 'r')))
+    print(f"len(orig_dialog) = {len(orig_dialog)}")
+    orig_schema = json.load(open(schema_path, 'r'))
+
+    dialogue_count = defaultdict(int)
+    final_dialogues = defaultdict(list)
+
+    ontology = get_ontology(dialogues=orig_dialog, schemas=orig_schema)
+
+    for dialogue_id, dialogue in tqdm(enumerate(orig_dialog)):
+        validate(dialogue)  # for test purposes
+        augment_dialog_by_auxiliary_entries(dialogue)
+        validate(dialogue)  # for test purposes
+
+    if args.num2string:
+        if args.concat_orig_dialogue:
+            process_dialogues(
+                final_dialogues=final_dialogues,
+                dialogue_count=dialogue_count,
+                dialogues=orig_dialog,
+                replace_turn_prob=1.0,
+                replace_word_prob=1.0,
+                new_val_func=num2str,
+            )
+        else:
+            process_dialogues(
+                final_dialogues=defaultdict(list),
+                dialogue_count=defaultdict(int),
+                dialogues=orig_dialog,
+                replace_turn_prob=1.0,
+                replace_word_prob=1.0,
+                new_val_func=num2str,
+            )
+
+    for _ in range(args.repeat):
+        dialogues = copy.deepcopy(orig_dialog)
+        process_dialogues(
+            final_dialogues=final_dialogues,
+            dialogue_count=dialogue_count,
+            dialogues=dialogues,
+            replace_turn_prob=args.replace_turn_prob,
+            replace_word_prob=args.replace_word_prob,
+            new_val_func=get_new_value,
+        )
+
+    if args.concat_orig_dialogue and not args.num2string:
+        for dialogue_id, dialogue in tqdm(enumerate(orig_dialog)):
+            d_id, d_count = dialogue["dialogue_id"].split("_")
+            d_id = int(d_id)
+            dialogue["dialogue_id"] = f"{d_id}_{dialogue_count[d_id]:05d}"
+            dialogue_count[d_id] += 1
+            final_dialogues[d_id].append(dialogue)
+
+    for dir_id, dialogues in final_dialogues.items():
+        for dialogue in dialogues:
+            for turn in dialogue["turns"]:
+                for frame in turn["frames"]:
+                    if 'state_update' in frame:
+                        frame.pop("state_update")
+                    if 'slot_to_span' in frame:
+                        frame.pop("slot_to_span")
+    if args.output_dir is None:
+        output_dir = f"augmented_repeat{args.repeat}_replace_turn_prob{args.replace_turn_prob}_replace_word_prob{args.replace_word_prob}_concatorig{args.concat_orig_dialogue}_num2string{args.num2string}"
+    else:
+        output_dir = args.output_dir
+    os.makedirs(output_dir, exist_ok=True)
+    for dir_id, dialogues in final_dialogues.items():
+        with open(os.path.join(output_dir, f"dialogues_{dir_id:03d}.json"), 'w') as outfile:
+            json.dump(dialogues, outfile, indent=2)
+
+    with open(os.path.join(output_dir, f"schema.json"), 'w') as outfile:
+        json.dump(orig_schema, outfile, indent=2)
diff --git a/tests/context.py b/examples/nlp/dialogue_state_tracking/data/multiwoz/__init__.py
similarity index 79%
rename from tests/context.py
rename to examples/nlp/dialogue_state_tracking/data/multiwoz/__init__.py
index 805c10a0e6c7..cd24d1f06b22 100644
--- a/tests/context.py
+++ b/examples/nlp/dialogue_state_tracking/data/multiwoz/__init__.py
@@ -1,7 +1,6 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
+# =============================================================================
 # Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,8 +14,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-
-import nemo
-import nemo.collections.asr as nemo_asr
-import nemo.collections.nlp as nemo_nlp
-import nemo.collections.tts as nemo_tts
diff --git a/examples/nlp/dialogue_state_tracking/data/multiwoz/correct_categorical_state_values.tsv b/examples/nlp/dialogue_state_tracking/data/multiwoz/correct_categorical_state_values.tsv
new file mode 100644
index 000000000000..0672290108ea
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/data/multiwoz/correct_categorical_state_values.tsv
@@ -0,0 +1,18 @@
+alpha-milton	alpha milton
+any	dontcare
+bed and breakfast	guesthouse
+boating	boat
+cam	cambridge
+concert	concerthall
+concert hall	concerthall
+guest house	guesthouse
+guesthouses	guesthouse
+moderate|cheap	cheap|moderate
+museum kettles yard	museum
+mutiple sports	multiple sports
+nightclub	night club
+acorn guesthouse	acorn guest house
+swimmingpool	swimming pool
+sports	multiple sports
+pool	swimming pool
+theater	theatre
\ No newline at end of file
diff --git a/examples/nlp/dialogue_state_tracking/data/multiwoz/create_data_from_multiwoz.py b/examples/nlp/dialogue_state_tracking/data/multiwoz/create_data_from_multiwoz.py
new file mode 100644
index 000000000000..bef1afe77eb7
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/data/multiwoz/create_data_from_multiwoz.py
@@ -0,0 +1,793 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Converts Multiwoz 2.1 dataset to the data format of SGD."""
+import argparse
+import collections
+import copy
+import json
+import os
+import re
+
+import nemo.collections.nlp.data.datasets.sgd_dataset.schema as schema
+from nemo import logging
+
+# Parsing arguments
+parser = argparse.ArgumentParser(description='conversion of multiwoz into sgd')
+
+parser.add_argument('--input_data_dir', type=str, required=True, help='Path of the dataset to convert from.')
+parser.add_argument(
+    '--output_dir',
+    type=str,
+    help='Path to output directory. If not specified, generate the dialogues in the same directory as the script.',
+)
+parser.add_argument(
+    '--annotate_copy_slots',
+    action='store_true',
+    help='Whether to annotate slots whose value is copied from a different slot in '
+    'the previous state. If true, add a new key "copy_from" in the slot '
+    'annotation dict. Its value is the slot that the value is copied from.',
+)
+
+parser.add_argument('--schema_file_name', default='schema.json', type=str, help='Name of the schema file to use.')
+
+args = parser.parse_args()
+
+_PATH_MAPPING = [('test', 'testListFile.json'), ('dev', 'valListFile.json'), ('train', '')]
+
+_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
+# File used for correcting categorical slot values. Each line is a pair of
+# the original slot value in MultiWOZ 2.1 annotation and the corrected slot
+# value.
+_CORRECT_FOR_STATE_PATH = os.path.join(_DIR_PATH, 'correct_categorical_state_values.tsv')
+
+_DEFAULT_SERVICE_NAME = 'all'
+# "Don't care" slot value.
+_DONT_CARE = 'dontcare'
+_NONE_VALUE = 'none'
+_INACTIVE_INTENT = 'NONE'
+# Maximum number of dialogues to write in each output file.
+_NUM_DIALS_PER_FILE = 512
+
+# We try to find the span of non-categorical slot values in the dialog history,
+# but sometimes there is no exact match and we choose to find the closest values
+# from the utterance. If the found value is contained in the list below,
+# we need to check if it is a correct match.
+_FOUND_VALUES_NEED_CHECK = [
+    'restaurant',
+    'hotel',
+    'museum',
+    'church',
+    'college',
+    'cinema',
+    'park',
+    'guesthouses',
+    'guesthouse',
+    'great',
+    'from',
+    'hotels',
+    'school',
+    'schools',
+    'guests',
+    'colleges',
+    'lodge',
+    'theatre',
+    'centre',
+    'bar',
+    'bed and breakfast',
+    'train',
+    'station',
+    'gallery',
+    'la',
+    'time',
+    'house',
+    'guest house',
+    'old',
+    'pool',
+    'house',
+    'a',
+    'b',
+    'the',
+    'cafe',
+    'cambridge',
+    'hospital',
+    'restaurant\'s',
+]
+
+# A collection of phrases that are semantically similar to the key value, which
+# is a word.
+_SIMILAR_WORDS = {
+    'portuguese': ['portugese', 'portugeuese'],
+    '01:30': ['1 thirty p . m .'],
+    '16:30': ['after 16:00'],
+    'anatolia': ['anatoilia'],
+    'allenbell': ['allenball'],
+    'caribbean': ['carribbean'],
+    'seafood': ['sea food'],
+    'moroccan': ['morrocan'],
+    'avalon': ['avaion'],
+    'barbeque': ['bbq'],
+    'american': ['americas'],
+    'italian': ['pizza place'],
+    'indian': ['taj tandoori'],
+    'british': ['english'],
+    'cambride': ['cambridge'],
+    'fenditton': ['fen ditton'],
+    'cafe': ['caffe'],
+    'gonvile': ['gonville'],
+    'shaddia': ['shaddai'],
+}
+
+# A collection of phrases that are semantically similar to the key value, which
+# is a phrase consisted of more than one word.
+_SIMILAR_PHRASES = {
+    'alexander bed and breakfast': ['alexander b&b', 'alexander bed and breafast', 'alexander bed & breakfast'],
+    'a and b guest house': ['a & b guest house', 'a and b guesthouse', 'a and be guest house'],
+    'saint johns chop house': ['saint johns chop shop house'],
+    'bridge guest house': ['bridge guesthouse'],
+    'finches b and b': ['finches b & b', 'finches b&b'],
+    'finches bed and breakfast': ['flinches bed and breakfast', 'finches b&b'],
+    'carolina bed and breakfast': ['carolina b&b'],
+    'city centre north b and b': ['city centre north b&b', 'city centre north b & b'],
+    'lan hong house': ['ian hong house', 'ian hong'],
+    'ugly duckling': ['ugly ducking'],
+    'sri lankan': ['sri lanken'],
+    'cambridge punter': ['cambridge punte'],
+    'abc theatre': ['adc theatre'],
+}
+
+
+def _locate_boundary(phrase, text):
+    """Locate the span of the phrase using exact match."""
+
+    def _locate_token_boundary(pos, text):
+        """Get the start and end index of a token that covers a certain position."""
+        if pos < 0:
+            raise ValueError('Pos {} should be a positive integer.'.format(pos))
+        next_space = text.find(' ', pos)
+        left_boundary = text.rfind(' ', 0, pos) + 1
+        right_boundary = next_space if next_space != -1 else len(text)
+        return left_boundary, right_boundary
+
+    phrase = phrase.strip()
+    pos_in_text = text.find(phrase)
+    if pos_in_text == -1:
+        return None, None
+
+    tokens = phrase.split()
+    start_idx, _ = _locate_token_boundary(pos_in_text, text)
+    last_token = tokens[-1]
+    find_last_token = text.find(last_token, pos_in_text + len(phrase) - len(last_token))
+    if find_last_token == -1:
+        raise ValueError('Should find the last word for value {}'.format(phrase))
+    _, end_idx = _locate_token_boundary(find_last_token, text)
+    # If it's a number, the value should be exactly the same.
+    if phrase.isdigit() and text[start_idx:end_idx] != phrase:
+        return None, None
+    # If the phrase is short, the value should be exactly the same.
+    # e.g. we don't want to match "theatre" when searching for "the"
+    if len(phrase) <= 3 and len(phrase) != (end_idx - start_idx):
+        return None, None
+    return start_idx, end_idx
+
+
+def _locate_word(word, text, start_pos):
+    """Get start and end index of a phrase that semantically equals to a word."""
+    # If the word to search for contains 3 or 4 digits, correct it into time.
+    obj = re.match(r'(?<!\d)\d{3,4}(?!\d)', word)
+
+    assert start_pos <= len(text)
+    if start_pos == len(text):
+        return None, None
+    text = text[start_pos:]
+    if obj:
+        if int(obj.group()) < 10000:
+            word = ':'.join([obj.group(0)[:-2], obj.group(0)[-2:]])
+    obj = re.match(r'^(\d+):(\d+)', word)
+    if obj:
+        # If word is about time, try different variations.
+        # e.g. 10:15 can be written as 1015 or 10.15.
+        times_to_try = [obj.group(0), obj.group(1) + obj.group(2), '.'.join([obj.group(1), obj.group(2)])]
+        hour = int(obj.group(1))
+        minute = int(obj.group(2))
+        if hour > 12:
+            times_to_try.append(':'.join([str(hour - 12), obj.group(2)]))
+            if minute == 0:
+                times_to_try.append(str(hour - 12) + ' pm')
+                times_to_try.append(str(hour - 12) + 'pm')
+                times_to_try.append(str(hour - 12) + ' p . m .')
+                times_to_try.append(str(hour - 12) + ' o\'clock p . m .')
+                times_to_try.append(str(hour - 12) + ' o\'clock')
+                times_to_try.append(str(hour) + ' o\'clock')
+                times_to_try.append(str(hour - 12) + ':00')
+                times_to_try.append(str(hour))
+        elif hour == 12 and minute == 0:
+            times_to_try.extend(['12 pm', '12pm', '12 o\'clock', '12 p . m .', '12', 'noon'])
+        else:
+            times_to_try.append(':'.join([str(hour + 12), obj.group(2)]))
+            if int(minute) == 0:
+                times_to_try.append(str(hour) + ' am')
+                times_to_try.append(str(hour) + 'am')
+                times_to_try.append(str(hour) + ' a . m .')
+                times_to_try.append(str(hour) + ' o\'clock a . m .')
+                times_to_try.append(str(hour) + ' o\'clock')
+                times_to_try.append(str(hour + 12) + ':00')
+                times_to_try.append(str(hour))
+        if minute == 15 or minute == 45 or minute == 30:
+            times_to_try.append('after ' + str(hour) + ':' + str(minute - 15))
+            if hour < 10:
+                times_to_try.append('after 0' + str(hour) + ':' + str(minute - 15))
+        if minute == 0:
+            times_to_try.append('after ' + str(hour - 1) + ':45')
+        for time_value in times_to_try:
+            # Correct time like "08:15" to "8:15" to increase match possibility.
+            if time_value[0] == '0':
+                if len(time_value) > 2 and time_value[1] != [':']:
+                    time_value = time_value[1:]
+    else:
+        start_idx, end_idx = _locate_boundary(word, text)
+        if start_idx is not None:
+            return start_idx + start_pos, end_idx + start_pos
+    # Try phrases that is similar to the word to find.
+    for similar_word in _SIMILAR_WORDS.get(word, []):
+        start_idx, end_idx = _locate_boundary(similar_word, text)
+        if start_idx is not None:
+            return start_idx + start_pos, end_idx + start_pos
+
+    # Slot values ended with 's' can be written in different formats.
+    # e.g. rosas can be written as rosa, rosa's.
+    if word.endswith('s') and len(word) > 3:
+        modified_words = [word[:-1] + '\'s', word[:-1]]
+        for modified_word in modified_words:
+            start_idx, end_idx = _locate_boundary(modified_word, text)
+            if start_idx is not None:
+                return start_idx + start_pos, end_idx + start_pos
+    return None, None
+
+
+def exists_in_prev_dialog_states(slot_value, converted_turns):
+    """Whether slot value exists in the previous dialogue states."""
+    for user_turn in converted_turns[::2]:
+        assert user_turn['speaker'] == 'USER'
+        for frame in user_turn['frames']:
+            if 'state' in frame and 'slot_values' in frame['state']:
+                slot_values_dict = frame['state']['slot_values']
+                for slot, values_list in slot_values_dict.items():
+                    new_list = []
+                    for value in values_list:
+                        new_list.extend(value.split('|'))
+                    if slot_value in new_list:
+                        return frame['service'], slot, values_list
+    return None, None, None
+
+
+class Processor(object):
+    """A processor to convert Multiwoz to the data format used in SGD."""
+
+    def __init__(self, schemas):
+        self._schemas = schemas
+        # For statistically evaluating the modifications.
+        # Number of non-categorical slot values in dialogue state, which needs span
+        # annotations.
+        self._slot_spans_num = 0
+        # Dict to track the number of non-categorical slot values whose span can not
+        # be found.
+        self._unfound_slot_spans_num = collections.Counter()
+
+        # Dict used to correct categorical slot values annotated in MultiWOZ 2.1.
+        self._slot_value_correction_for_cat_slots = {}
+        with open(_CORRECT_FOR_STATE_PATH, 'r') as f:
+            for line in f:
+                tok_from, tok_to = line.replace('\n', '').split('\t')
+                self._slot_value_correction_for_cat_slots[tok_from] = tok_to
+
+    @property
+    def unfound_slot_span_ratio(self):
+        """Get the ratio of the slot spans that can't be found in the utterances."""
+        ratio_dict = {k: float(v) / float(self._slot_spans_num) for k, v in self._unfound_slot_spans_num.items()}
+        ratio_dict['total'] = float(sum(self._unfound_slot_spans_num.values())) / float(self._slot_spans_num)
+        return ratio_dict
+
+    def _basic_text_process(self, text, lower=True):
+        # Remove redundant spaces.
+        text = re.sub(r'\s+', ' ', text).strip()
+        if lower:
+            text = text.lower()
+        return text
+
+    def _insert_slots_annotations_to_turn(self, turn, slots_annotations_list, service_name):
+        """Insert slot span annotations to a turn."""
+        found_service = False
+        for frame in turn['frames']:
+            if frame['service'] == service_name:
+                frame['slots'].extend(slots_annotations_list)
+                found_service = True
+                continue
+        if not found_service:
+            turn['frames'].append({'service': service_name, 'slots': slots_annotations_list, 'actions': []})
+        return
+
+    def _correct_state_value_for_noncat(self, slot, val):
+        """Correct slot values for non-categorical slots."""
+        val = val.strip()
+        if (
+            (val == 'cam' and slot == 'restaurant-name')
+            or (val == 'friday' and slot == 'train-leaveat')
+            or (val == 'bed' and slot == 'attraction-name')
+        ):
+            return ''
+        if val == 'portugese':
+            val = 'portuguese'
+        return val
+
+    def _correct_state_value_for_cat(self, _, val):
+        """Correct slot values for categorical slots."""
+        val = val.strip()
+        return self._slot_value_correction_for_cat_slots.get(val, val)
+
+    def _get_intent_from_actions(self, state_value_dict, sys_actions, user_actions):
+        """Generate user intent by rules.
+
+    We assume each service has only one active intent which equals to the domain
+    mentioned in the current user turn.
+    We use _infer_domains_from_actions to infer the list of possible domains.
+    Domains that appear in the user actions and dialogue updates are prioritised
+    over domains mentioned in the previous system actions.
+    In the provided schema of MultiWOZ 2.1, every service contains one domain,
+    so the active_intent is either "NONE" or "find_{domain}" for every service.
+
+    Args:
+      state_value_dict: a dict, key is the slot name, value is a list.
+      sys_actions: a list of sys actions in the next turn.
+      user_actions: a list of user actions.
+
+    Returns:
+      String, intent of the current user turn.
+    """
+
+        def _infer_domains_from_actions(state_value_dict, sys_actions, user_actions):
+            """Infer the domains involved in the current turn from actions."""
+            user_mentioned_domains = set()
+            for user_action in user_actions:
+                domain = user_action['act'].lower().split('-')[0]
+                if domain not in ['general', 'booking']:
+                    user_mentioned_domains.add(domain)
+            sys_mentioned_domains = set()
+            for sys_action in sys_actions:
+                domain = sys_action['act'].lower().split('-')[0]
+                if domain not in ['general', 'booking']:
+                    sys_mentioned_domains.add(domain)
+            # Compute domains whose slot values get updated in the current turn.
+            state_change_domains = set()
+            for slot, _ in state_value_dict.items():
+                domain_name = slot.split('-')[0]
+                state_change_domains.add(domain_name)
+            # Infer the possible domains involved in the current turn for a certain
+            # service.
+            return list(user_mentioned_domains.union(state_change_domains)) or list(sys_mentioned_domains)
+
+        domains = _infer_domains_from_actions(state_value_dict, sys_actions, user_actions)
+        return 'find_' + domains[0] if domains else _INACTIVE_INTENT
+
+    def _is_filled(self, slot_value):
+        """Whether a slot value is filled."""
+        slot_value = slot_value.lower()
+        return slot_value and slot_value != 'not mentioned' and slot_value != 'none'
+
+    def _new_service_name(self, domain):
+        """Get the new service_name decided by the new schema."""
+        # If the schema file only contains one service, we summarize all the slots
+        # into one service, otherwise, keep the domain name as the service name.
+        return _DEFAULT_SERVICE_NAME if (len(self._schemas.services) == 1) else domain
+
+    def _get_slot_name(self, slot_name, service_name, in_book_field=False):
+        """Get the slot name that is consistent with the schema file."""
+        slot_name = 'book' + slot_name if in_book_field else slot_name
+        return '-'.join([service_name, slot_name]).lower()
+
+    def _generate_dialog_states(self, frame_dict, overwrite_slot_values):
+        """Get the dialog states and overwrite some of the slot values."""
+        dialog_states = collections.defaultdict(dict)
+        orig_dialog_states = collections.defaultdict(dict)
+        for domain_name, values in frame_dict.items():
+            dialog_states_of_one_domain = {}
+            for k, v in values['book'].items():
+                if isinstance(v, list):
+                    for item_dict in v:
+                        new_states = {
+                            self._get_slot_name(slot_name, domain_name, in_book_field=True): slot_val
+                            for slot_name, slot_val in item_dict.items()
+                        }
+                        dialog_states_of_one_domain.update(new_states)
+                if isinstance(v, str) and v:
+                    slot_name = self._get_slot_name(k, domain_name, in_book_field=True)
+                    dialog_states_of_one_domain[slot_name] = v
+            new_states = {
+                self._get_slot_name(slot_name, domain_name): slot_val for slot_name, slot_val in values['semi'].items()
+            }
+            dialog_states_of_one_domain.update(new_states)
+            # Get the new service_name that is decided by the schema. If the
+            # schema file only contains one service, we summarize all the slots into
+            # one service, otherwise, keep the domain name as the service name.
+            new_service_name = self._new_service_name(domain_name)
+            # Record the orig state values without any change.
+            orig_dialog_state_of_one_domain = copy.deepcopy(dialog_states_of_one_domain)
+            for (key, value) in orig_dialog_state_of_one_domain.items():
+                if key in self._schemas.get_service_schema(new_service_name).slots and self._is_filled(value):
+                    orig_dialog_states[new_service_name][key] = value
+            # Correct the slot values in the dialogue state.
+            corrected_dialog_states_of_one_domain = {}
+            for k, v in dialog_states_of_one_domain.items():
+                if k in self._schemas.get_service_schema(new_service_name).categorical_slots:
+                    corrected_dialog_states_of_one_domain[k] = self._correct_state_value_for_cat(
+                        k, self._basic_text_process(v)
+                    )
+                else:
+                    corrected_dialog_states_of_one_domain[k] = self._correct_state_value_for_noncat(
+                        k, self._basic_text_process(v)
+                    )
+            dialog_states_of_one_domain = {
+                k: v for k, v in corrected_dialog_states_of_one_domain.items() if self._is_filled(v)
+            }
+
+            # Overwrite some of the slot values and changes the slot value of a slot
+            # into a list.
+            for slot, value in dialog_states_of_one_domain.items():
+                dialog_states_of_one_domain[slot] = [value]
+                if slot in overwrite_slot_values[new_service_name]:
+                    if value in overwrite_slot_values[new_service_name][slot]:
+                        dialog_states_of_one_domain[slot] = sorted(
+                            overwrite_slot_values[new_service_name][slot][value]
+                        )
+            # Only track the slot values that are listed in the schema file. Slots
+            # such as reference number, phone number are filtered out.
+            for (key, value) in dialog_states_of_one_domain.items():
+                if key in self._schemas.get_service_schema(new_service_name).slots:
+                    dialog_states[new_service_name][key] = value
+        return dialog_states, orig_dialog_states
+
+    def _get_update_states(self, prev_ds, cur_ds):
+        """Get the updated dialogue states between two user turns."""
+        updates = collections.defaultdict(dict)
+        for service, slot_values_dict in cur_ds.items():
+            if service not in prev_ds:
+                updates[service] = slot_values_dict
+                continue
+            for slot, values in slot_values_dict.items():
+                for value in values:
+                    if slot not in prev_ds[service] or value not in prev_ds[service][slot]:
+                        updates[service][slot] = updates[service].get(slot, []) + [value]
+        return updates
+
+    def _generate_slot_annotation(self, orig_utt, slot, slot_value):
+        """Generate the slot span of a slot value from the utterance.
+
+    Args:
+      orig_utt: Original utterance in string.
+      slot: Slot name in string.
+      slot_value: Slot value to be annotated in string.
+
+    Returns:
+      slot_ann: A dict that denotes the slot name and slot spans.
+      slot_value: The corrected slot value based on the utterance. It's
+        unchanged if the slot value can't be found in the utterance.
+    """
+        slot_ann = []
+        utt = orig_utt.lower()
+        start_idx, end_idx = None, None
+        # Check if the utterance mentions any phrases that are semantically same as
+        # the slot value.
+        for alias_slot_value in [slot_value] + _SIMILAR_PHRASES.get(slot_value, []):
+            start_idx, end_idx = _locate_boundary(alias_slot_value, utt)
+            if start_idx is not None:
+                break
+        if start_idx is None:
+            # Tokenize the slot value and find each of them.
+            splitted_slot_values = slot_value.strip().split()
+            unfound_tokens_idx = []
+            search_start_idx = 0
+            # Find if each token exists in the utterance.
+            for i, value_tok in enumerate(splitted_slot_values):
+                tok_start_idx, tok_end_idx = _locate_word(value_tok, utt, search_start_idx)
+                if tok_start_idx is not None and tok_end_idx is not None:
+                    # Hard coded rules
+                    # if the value to find is one of ['and', 'of', 'by'] and
+                    # there's no token prior to them having been found, we don't think
+                    # the value as found since they are fairly common words.
+                    if value_tok in ['and', 'of', 'by'] and start_idx is None:
+                        unfound_tokens_idx.append(i)
+                        continue
+                    if start_idx is None:
+                        start_idx = tok_start_idx
+                    search_start_idx = tok_end_idx
+                else:
+                    unfound_tokens_idx.append(i)
+            # Record the last index.
+            if search_start_idx > 0:
+                end_idx = search_start_idx
+        if start_idx is None:
+            return [], slot_value
+        new_slot_value = utt[start_idx:end_idx]
+
+        if abs(len(slot_value) - len(new_slot_value)) > 20:
+            return [], slot_value
+        if len(new_slot_value.split()) > (len(slot_value.strip().split()) + 2) and (
+            new_slot_value not in _SIMILAR_PHRASES.get(slot_value, [])
+        ):
+            return [], slot_value
+        # If the value found from the utterance is one of values below and the real
+        # slot value contains more than one tokens, we don't think it as a
+        # successful match.
+        if new_slot_value.strip() in _FOUND_VALUES_NEED_CHECK and len(slot_value.split()) > 1:
+            return [], slot_value
+        # If the value based on the utterance ends with any value below, we don't
+        # annotate span of it.
+        if new_slot_value.strip().split()[-1] in ['and', 'the', 'of', 'by']:
+            return [], slot_value
+        slot_ann.append(
+            {'slot': slot, 'value': orig_utt[start_idx:end_idx], 'exclusive_end': end_idx, 'start': start_idx,}
+        )
+        return slot_ann, new_slot_value
+
+    def _update_corrected_slot_values(
+        self, corrected_slot_values_dict, service_name, slot, slot_value, new_slot_value
+    ):
+        """Update the dict that keeps track of the modified state values."""
+        if slot not in corrected_slot_values_dict[service_name]:
+            corrected_slot_values_dict[service_name][slot] = collections.defaultdict(set)
+            corrected_slot_values_dict[service_name][slot][slot_value] = {slot_value}
+        corrected_slot_values_dict[service_name][slot][slot_value].add(new_slot_value)
+        return
+
+    def _get_requested_slots_from_action(self, act_list):
+        """Get user's requested slots from the action."""
+        act_request = []
+        for act_dict in act_list:
+            if 'request' in act_dict['act'].lower():
+                slot_name = act_dict['slot']
+                if slot_name == 'Arrive':
+                    slot_name = 'arriveby'
+                elif slot_name == 'Leave':
+                    slot_name = 'leaveat'
+                act_request.append('-'.join([act_dict['act'].split('-')[0], slot_name]).lower())
+        return act_request
+
+    def _generate_actions(self, dialog_act):
+        """Generate user/system actions."""
+        converted_actions = collections.defaultdict(list)
+        for k, pair_list in dialog_act.items():
+            k_list = k.lower().strip().split('-')
+            domain = k_list[0]
+            service_name = self._new_service_name(domain)
+            act_slot_values_dict = collections.defaultdict(list)
+            for pair in pair_list:
+                slot = pair[0]
+                slot_value = pair[1]
+                if slot != _NONE_VALUE:
+                    act_slot_values_dict[slot].append(slot_value)
+            if not act_slot_values_dict:
+                converted_actions[service_name].append({'act': k})
+            for slot, values in act_slot_values_dict.items():
+                converted_actions[service_name].append({'act': k, 'slot': slot, 'values': values})
+        return converted_actions
+
+    def _generate_dial_turns(self, turns, dial_id):
+        """Generate the dialog turns and the services mentioned in the dialogue."""
+        prev_dialog_states = collections.defaultdict(dict)
+        corrected_slot_values = collections.defaultdict(dict)
+        converted_turns = []
+        appear_services = set()
+        if len(turns) % 2 != 0:
+            raise ValueError('dialog ended by user')
+        for i in range(len(turns))[::2]:
+            user_info = turns[i]
+            sys_info = turns[i + 1]
+            user_utt = self._basic_text_process(user_info['text'], False)
+            sys_utt = self._basic_text_process(sys_info['text'], False)
+            user_actions = collections.defaultdict(list)
+            sys_actions = collections.defaultdict(list)
+            if 'dialog_act' in user_info:
+                user_actions = self._generate_actions(user_info['dialog_act'])
+            if 'dialog_act' in sys_info:
+                sys_actions = self._generate_actions(sys_info['dialog_act'])
+
+            sys_turn = {'utterance': sys_utt, 'speaker': 'SYSTEM', 'frames': [], 'turn_id': str(i + 1)}
+            user_turn = {'utterance': user_utt, 'speaker': 'USER', 'frames': [], 'turn_id': str(i)}
+            dialog_states, _ = self._generate_dialog_states(sys_info['metadata'], corrected_slot_values)
+            appear_services.update(dialog_states.keys())
+
+            # Fill in slot spans in the user turn and the previous system turn for
+            # the non categorical slots.
+            user_slots = collections.defaultdict(list)
+            sys_slots = collections.defaultdict(list)
+            update_states = self._get_update_states(prev_dialog_states, dialog_states)
+            prev_sys_utt = converted_turns[-1]['utterance'] if converted_turns else ''
+            for service_name, slot_values_dict in update_states.items():
+                new_service_name = self._new_service_name(service_name)
+                service_schema = self._schemas.get_service_schema(new_service_name)
+                for slot, slot_value in slot_values_dict.items():
+                    assert slot_value, 'slot values shouls not be empty'
+                    slot_value = slot_value[0]
+                    if slot in service_schema.categorical_slots:
+                        if slot_value not in service_schema.get_categorical_slot_values(slot) and slot_value not in [
+                            _DONT_CARE
+                        ]:
+                            logging.error('Value %s not contained in slot %s, dial_id %s, ', slot_value, slot, dial_id)
+                            dialog_states[service_name][slot] = [slot_value]
+                    else:
+                        self._slot_spans_num += 1
+                        if slot_value == _DONT_CARE:
+                            continue
+                        user_slot_ann, slot_value_from_user = self._generate_slot_annotation(
+                            user_utt, slot, slot_value
+                        )
+                        sys_slot_ann, slot_value_from_sys = self._generate_slot_annotation(
+                            prev_sys_utt, slot, slot_value
+                        )
+                        # Values from user utterance has a higher priority than values from
+                        # sys utterance. We correct the slot value of non-categorical slot
+                        # first based on user utterance, then system utterance.
+                        if user_slot_ann and slot_value_from_user != slot_value:
+                            if sys_slot_ann and (slot_value_from_sys == slot_value):
+                                user_slot_ann = None
+                            else:
+                                self._update_corrected_slot_values(
+                                    corrected_slot_values, service_name, slot, slot_value, slot_value_from_user
+                                )
+                                dialog_states[service_name][slot] = list(
+                                    corrected_slot_values[service_name][slot][slot_value]
+                                )
+                        if not user_slot_ann and sys_slot_ann and slot_value_from_sys != slot_value:
+                            self._update_corrected_slot_values(
+                                corrected_slot_values, service_name, slot, slot_value, slot_value_from_sys
+                            )
+                            dialog_states[service_name][slot] = list(
+                                corrected_slot_values[service_name][slot][slot_value]
+                            )
+                        if user_slot_ann:
+                            user_slots[service_name].extend(user_slot_ann)
+                        if sys_slot_ann:
+                            sys_slots[service_name].extend(sys_slot_ann)
+                        if not user_slot_ann and not sys_slot_ann:
+                            # First check if it exists in the previous dialogue states.
+                            from_service_name, from_slot, from_slot_values = exists_in_prev_dialog_states(
+                                slot_value, converted_turns
+                            )
+                            if from_service_name is not None:
+                                self._unfound_slot_spans_num['copy_from_prev_dialog_state'] += 1
+                                if args.annotate_copy_slots:
+                                    user_slots[service_name].append(
+                                        {'slot': slot, 'copy_from': from_slot, 'value': from_slot_values}
+                                    )
+                                continue
+                            # Second, trace back the dialogue history to find the span.
+                            for prev_turn in converted_turns[-2::-1]:
+                                prev_utt = prev_turn['utterance']
+                                prev_slot_ann, prev_slot_value = self._generate_slot_annotation(
+                                    prev_utt, slot, slot_value
+                                )
+                                if prev_slot_ann:
+                                    if prev_slot_value != slot_value:
+                                        self._update_corrected_slot_values(
+                                            corrected_slot_values, service_name, slot, slot_value, prev_slot_value
+                                        )
+                                        dialog_states[service_name][slot] = list(
+                                            corrected_slot_values[service_name][slot][slot_value]
+                                        )
+                                    self._insert_slots_annotations_to_turn(prev_turn, prev_slot_ann, service_name)
+                                    break
+                            self._unfound_slot_spans_num[slot] += 1
+                            continue
+            # Fill in slot annotations for the system turn.
+            for service_name in sys_slots:
+                if not sys_slots[service_name]:
+                    continue
+                self._insert_slots_annotations_to_turn(converted_turns[-1], sys_slots[service_name], service_name)
+            # Generate user frames from dialog_states.
+            latest_update_states = self._get_update_states(prev_dialog_states, dialog_states)
+            for service_name, slot_values_dict in dialog_states.items():
+                user_intent = self._get_intent_from_actions(
+                    latest_update_states[service_name], sys_actions[service_name], user_actions[service_name]
+                )
+                # Fill in values.
+                user_turn['frames'].append(
+                    {
+                        'slots': user_slots[service_name],
+                        'state': {
+                            'slot_values': {k: v for k, v in slot_values_dict.items() if v},
+                            'requested_slots': self._get_requested_slots_from_action(user_actions[service_name]),
+                            'active_intent': user_intent,
+                        },
+                        'service': service_name,
+                    }
+                )
+            non_active_services = set(self._schemas.services) - appear_services
+            for service_name in non_active_services:
+                user_intent = self._get_intent_from_actions({}, sys_actions[service_name], user_actions[service_name])
+                user_turn['frames'].append(
+                    {
+                        'service': service_name,
+                        'slots': [],
+                        'state': {
+                            'active_intent': user_intent,
+                            'requested_slots': self._get_requested_slots_from_action(user_actions[service_name]),
+                            'slot_values': {},
+                        },
+                    }
+                )
+            converted_turns.extend([user_turn, sys_turn])
+            prev_dialog_states = dialog_states
+        return converted_turns, list(appear_services)
+
+    def convert_to_dstc(self, id_list, dialogs):
+        """Generate a list of dialogues in the dstc8/SGD data format."""
+        converted_dialogs = []
+        for dial_id in id_list:
+            converted_turns, covered_services = self._generate_dial_turns(dialogs[dial_id]['log'], dial_id)
+            dialog = {'dialogue_id': dial_id, 'services': covered_services, 'turns': converted_turns}
+            converted_dialogs.append(dialog)
+        return converted_dialogs
+
+
+def change_to_nemo_id(dialogs_list, file_index):
+    for i, dialogue in enumerate(dialogs_list):
+        dialogue['dialogue_id'] = f'{file_index}_{i:05d}'
+    return dialogs_list
+
+
+def main():
+    schema_path = os.path.join(_DIR_PATH, args.schema_file_name)
+    schemas = schema.Schema(schema_path)
+    processor = Processor(schemas)
+    data_path = os.path.join(args.input_data_dir, 'data.json')
+    with open(data_path, 'r') as f:
+        data = json.load(f)
+    dev_test_ids = []
+    output_dir = args.output_dir or _DIR_PATH
+    # Generate dev and test set according to the ids listed in the files. Ids not
+    # included in the dev and test id list files belong to the training set.
+    for output_dir_name, file_name in _PATH_MAPPING:
+        output_sub_dir = os.path.join(output_dir, output_dir_name)
+        if not os.path.exists(output_sub_dir):
+            os.makedirs(output_sub_dir)
+        schema_path = os.path.join(output_sub_dir, 'schema.json')
+        schemas.save_to_file(schema_path)
+        dial_ids = []
+        if file_name:
+            id_list_path = os.path.join(args.input_data_dir, file_name)
+            with open(id_list_path) as f:
+                dial_ids = [id_name.strip() for id_name in f.readlines()]
+            dev_test_ids.extend(dial_ids)
+        else:
+            # Generate the ids for the training set.
+            dial_ids = list(set(data.keys()) - set(dev_test_ids))
+        converted_dials = processor.convert_to_dstc(dial_ids, data)
+        logging.info('Unfound slot span ratio %s', processor.unfound_slot_span_ratio)
+        logging.info('Writing %d dialogs to %s', len(converted_dials), output_sub_dir)
+        for i in range(0, len(converted_dials), _NUM_DIALS_PER_FILE):
+            file_index = int(i / _NUM_DIALS_PER_FILE) + 1
+            # Create a new json file and save the dialogues.
+            json_file_path = os.path.join(output_sub_dir, 'dialogues_{:03d}.json'.format(file_index))
+            dialogs_list = converted_dials[(file_index - 1) * _NUM_DIALS_PER_FILE : file_index * _NUM_DIALS_PER_FILE]
+            dialogs_list = change_to_nemo_id(dialogs_list, file_index)
+            with open(json_file_path, 'w') as f:
+                json.dump(dialogs_list, f, indent=2, separators=(',', ': '), sort_keys=True)
+            logging.info('Created %s with %d dialogues.', json_file_path, len(dialogs_list))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/nlp/dialogue_state_tracking/data/multiwoz/schema.json b/examples/nlp/dialogue_state_tracking/data/multiwoz/schema.json
new file mode 100644
index 000000000000..c130b0fd818b
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/data/multiwoz/schema.json
@@ -0,0 +1,636 @@
+[
+  {
+    "service_name": "hotel",
+    "slots": [
+      {
+        "name": "hotel-pricerange",
+        "description": "the price range of the hotel",
+        "possible_values": [
+          "$100",
+          "cheap",
+          "cheap>moderate",
+          "cheap|moderate",
+          "expensive",
+          "moderate"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-type",
+        "description": "the type of the hotel",
+        "possible_values": [
+          "guesthouse",
+          "hotel",
+          "hotel|guesthouse"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-parking",
+        "description": "does the hotel have free parking",
+        "possible_values": [
+          "free",
+          "no",
+          "yes"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-bookday",
+        "description": "the day of hotel booking",
+        "possible_values": [
+          "friday",
+          "friday>tuesday",
+          "monday",
+          "monday<thursday",
+          "saturday",
+          "saturday|tuesday",
+          "sunday",
+          "sunday>monday",
+          "thursday",
+          "tuesday",
+          "wednesday",
+          "wednesday|friday"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-bookpeople",
+        "description": "number of people to book the hotel for",
+        "possible_values": [
+          "1",
+          "2",
+          "3",
+          "4",
+          "5",
+          "6",
+          "7",
+          "8"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-bookstay",
+        "description": "the duration of stay or booking",
+        "possible_values": [
+          "1",
+          "2",
+          "3",
+          "3|1",
+          "4",
+          "5",
+          "5|4",
+          "6",
+          "7",
+          "8"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-stars",
+        "description": "the rating of the hotel",
+        "possible_values": [
+          "0",
+          "1",
+          "2",
+          "3",
+          "3|4",
+          "4",
+          "4|5",
+          "5"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-internet",
+        "description": "does it have internet or wifi",
+        "possible_values": [
+          "free",
+          "no",
+          "yes"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "hotel-name",
+        "description": "the name of the hotel",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "hotel-area",
+        "description": "the locality of the hotel",
+        "possible_values": [
+          "centre",
+          "east",
+          "north",
+          "south",
+          "west",
+          "west|centre"
+        ],
+        "is_categorical": true
+      }
+    ],
+    "description": "hotel reservations and vacation stays",
+    "intents": [
+      {
+        "name": "find_hotel",
+        "description": "search for a hotel to stay in",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "hotel-pricerange": "dontcare",
+          "hotel-type": "dontcare",
+          "hotel-parking": "dontcare",
+          "hotel-bookday": "dontcare",
+          "hotel-bookpeople": "dontcare",
+          "hotel-bookstay": "dontcare",
+          "hotel-stars": "dontcare",
+          "hotel-internet": "dontcare",
+          "hotel-name": "dontcare",
+          "hotel-area": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "train",
+    "slots": [
+      {
+        "name": "train-destination",
+        "description": "the city you want to go to",
+        "possible_values": [
+          "birmingham new street",
+          "bishops stortford",
+          "bournemouth",
+          "broxbourne",
+          "cambridge",
+          "centre",
+          "city centre north",
+          "copper kettle",
+          "curry prince",
+          "ely",
+          "glastonbury",
+          "gourmet burger kitchen",
+          "huntingdon marriott hotel",
+          "huntington marriott",
+          "kings lynn",
+          "leicester",
+          "liverpool",
+          "liverpool street",
+          "london",
+          "london kings cross",
+          "london liverpool street",
+          "norway",
+          "norwich",
+          "peterborough",
+          "stansted airport",
+          "stevenage"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "train-arriveby",
+        "description": "when should the train reach your destination",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "train-departure",
+        "description": "the location where you want to catch the train from",
+        "possible_values": [
+          "alpha milton",
+          "aylesbray lodge guest",
+          "birmingham new street",
+          "bishops stortford",
+          "brookshite",
+          "broxbourne",
+          "cafe uno",
+          "camboats",
+          "cambridge",
+          "cineworld",
+          "city hall",
+          "duxford",
+          "east london",
+          "ely",
+          "hamilton lodge",
+          "huntingdon",
+          "kings lynn",
+          "leicester",
+          "liverpool",
+          "london",
+          "london kings cross",
+          "london liverpool",
+          "london liverpool street",
+          "norwich",
+          "panahar",
+          "peterborough",
+          "stansted airport",
+          "stevenage",
+          "stratford",
+          "wandlebury country park"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "train-day",
+        "description": "the day of the journey",
+        "possible_values": [
+          "friday",
+          "monday",
+          "saturday",
+          "sunday",
+          "thursday",
+          "tuesday",
+          "wednesday"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "train-bookpeople",
+        "description": "number of tickets to buy",
+        "possible_values": [
+          "1",
+          "10",
+          "15",
+          "2",
+          "3",
+          "4",
+          "5",
+          "6",
+          "7",
+          "8",
+          "9"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "train-leaveat",
+        "description": "the departure time of the train",
+        "possible_values": [],
+        "is_categorical": false
+      }
+    ],
+    "description": "find trains that take you to places",
+    "intents": [
+      {
+        "name": "find_train",
+        "description": "search for trains that take you places",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "train-destination": "dontcare",
+          "train-arriveby": "dontcare",
+          "train-departure": "dontcare",
+          "train-day": "dontcare",
+          "train-bookpeople": "dontcare",
+          "train-leaveat": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "attraction",
+    "slots": [
+      {
+        "name": "attraction-area",
+        "description": "the place where you are located",
+        "possible_values": [
+          "centre",
+          "centre|west",
+          "east",
+          "north",
+          "south",
+          "west"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "attraction-name",
+        "description": "the name of the site you want to visit",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "attraction-type",
+        "description": "the type of attractions you are interested in",
+        "possible_values": [
+          "architecture",
+          "boat",
+          "boating",
+          "camboats",
+          "church",
+          "churchills college",
+          "cinema",
+          "college",
+          "concert",
+          "concerthall",
+          "concerthall|boat",
+          "entertainment",
+          "entertainment|cinemas|museums|theatres",
+          "gallery",
+          "gastropub",
+          "hiking|historical",
+          "hotel",
+          "multiple sports",
+          "multiple sports|theatre",
+          "museum",
+          "museum kettles yard",
+          "museum|nightclub",
+          "night club",
+          "outdoor",
+          "park",
+          "park|boat",
+          "pool",
+          "special",
+          "sports",
+          "swimming pool",
+          "theater",
+          "theatre"
+        ],
+        "is_categorical": true
+      }
+    ],
+    "description": "find touristy stuff to do around you",
+    "intents": [
+      {
+        "name": "find_attraction",
+        "description": "search for places to see for leisure",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "attraction-area": "dontcare",
+          "attraction-name": "dontcare",
+          "attraction-type": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "restaurant",
+    "slots": [
+      {
+        "name": "restaurant-pricerange",
+        "description": "indicates how expensive or cheap the restaurant is",
+        "possible_values": [
+          "cheap",
+          "cheap|moderate",
+          "expensive",
+          "moderate"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "restaurant-area",
+        "description": "the locality of the restaurant",
+        "possible_values": [
+          "centre",
+          "east",
+          "east|south",
+          "north",
+          "south",
+          "west"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "restaurant-food",
+        "description": "the cuisine or type of food served",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "restaurant-name",
+        "description": "the name of the restaurant",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "restaurant-bookday",
+        "description": "the day of booking at the restaurant",
+        "possible_values": [
+          "friday",
+          "monday",
+          "saturday",
+          "saturday|thursday",
+          "sunday",
+          "sunday|thursday",
+          "thursday",
+          "tuesday",
+          "wednesday"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "restaurant-bookpeople",
+        "description": "number of people to reserve the restaurant for",
+        "possible_values": [
+          "1",
+          "2",
+          "3",
+          "4",
+          "4|7",
+          "5",
+          "6",
+          "7",
+          "8"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "restaurant-booktime",
+        "description": "the time of the reservation at the restaurant",
+        "possible_values": [],
+        "is_categorical": false
+      }
+    ],
+    "description": "find places to dine and whet your appetite",
+    "intents": [
+      {
+        "name": "find_restaurant",
+        "description": "search for places to wine and dine",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "restaurant-pricerange": "dontcare",
+          "restaurant-area": "dontcare",
+          "restaurant-food": "dontcare",
+          "restaurant-name": "dontcare",
+          "restaurant-bookday": "dontcare",
+          "restaurant-bookpeople": "dontcare",
+          "restaurant-booktime": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "hospital",
+    "slots": [
+      {
+        "name": "hospital-department",
+        "description": "the kind of ailment or sickness you want treated",
+        "possible_values": [
+          "acute medical assessment unit",
+          "acute medicine for the elderly",
+          "antenatal",
+          "cambridge eye unit",
+          "cardiology",
+          "cardiology and coronary care unit",
+          "childrens oncology and haematology",
+          "childrens surgical and medicine",
+          "clinical decisions unit",
+          "clinical research facility",
+          "coronary care unit",
+          "diabetes and endocrinology",
+          "emergency department",
+          "gastroenterology",
+          "gynaecology",
+          "haematology",
+          "haematology and haematological oncology",
+          "haematology day unit",
+          "hepatobillary and gastrointestinal surgery regional referral centre",
+          "hepatology",
+          "infectious diseases",
+          "infusion services",
+          "inpatient occupational therapy",
+          "intermediate dependancy area",
+          "john farman intensive care unit",
+          "medical decisions unit",
+          "medicine for the elderly",
+          "neonatal unit",
+          "neurology",
+          "neurology neurosurgery",
+          "neurosciences",
+          "neurosciences critical care unit",
+          "oncology",
+          "oral and maxillofacial surgery and ent",
+          "paediatric clinic",
+          "paediatric day unit",
+          "paediatric intensive care unit",
+          "plastic and vascular surgery plastics",
+          "psychiatry",
+          "respiratory medicine",
+          "surgery",
+          "teenage cancer trust unit",
+          "transitional care",
+          "transplant high dependency unit",
+          "trauma and orthopaedics",
+          "trauma high dependency unit",
+          "urology"
+        ],
+        "is_categorical": true
+      }
+    ],
+    "description": "making you feel better when you are ill",
+    "intents": [
+      {
+        "name": "find_hospital",
+        "description": "search for a medical facility or a doctor",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "hospital-department": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "taxi",
+    "slots": [
+      {
+        "name": "taxi-leaveat",
+        "description": "the time you want to depart",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "taxi-destination",
+        "description": "the place you want to get to",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "taxi-departure",
+        "description": "the place you want to board the taxi",
+        "possible_values": [],
+        "is_categorical": false
+      },
+      {
+        "name": "taxi-arriveby",
+        "description": "the time of your arrival at the destination",
+        "possible_values": [],
+        "is_categorical": false
+      }
+    ],
+    "description": "rent cheap cabs to avoid traffic",
+    "intents": [
+      {
+        "name": "find_taxi",
+        "description": "search for taxis to avoid traffic",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "taxi-leaveat": "dontcare",
+          "taxi-destination": "dontcare",
+          "taxi-departure": "dontcare",
+          "taxi-arriveby": "dontcare"
+        }
+      }
+    ]
+  },
+  {
+    "service_name": "bus",
+    "slots": [
+      {
+        "name": "bus-departure",
+        "description": "the departure place of the bus",
+        "possible_values": [
+          "cambridge"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "bus-destination",
+        "description": "the destination of the bus",
+        "possible_values": [
+          "bishops stortford",
+          "cambridge",
+          "kohinoor",
+          "london kings cross"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "bus-leaveat",
+        "description": "the time when bus leaves",
+        "possible_values": [
+          "21:45"
+        ],
+        "is_categorical": true
+      },
+      {
+        "name": "bus-day",
+        "description": "the day of the bus",
+        "possible_values": [
+          "wednesday"
+        ],
+        "is_categorical": true
+      }
+    ],
+    "description": "Bus service for traveling",
+    "intents": [
+      {
+        "name": "find_bus",
+        "description": "search for a bus",
+        "is_transactional": false,
+        "required_slots": [],
+        "optional_slots": {
+          "bus-departure": "dontcare",
+          "bus-destination": "dontcare",
+          "bus-day": "dontcare",
+          "taxi-leaveat": "dontcare"
+        }
+      }
+    ]
+  }
+]
diff --git a/examples/nlp/scripts/multiwoz/replacements.txt b/examples/nlp/dialogue_state_tracking/data/multiwoz_mapping.pair
similarity index 98%
rename from examples/nlp/scripts/multiwoz/replacements.txt
rename to examples/nlp/dialogue_state_tracking/data/multiwoz_mapping.pair
index 34df41d01e93..dd15989b46b5 100644
--- a/examples/nlp/scripts/multiwoz/replacements.txt
+++ b/examples/nlp/dialogue_state_tracking/data/multiwoz_mapping.pair
@@ -80,4 +80,4 @@ ten	10
 eleven	11
 twelve	12
 anywhere	any where
-good bye	goodbye
+good bye	goodbye
\ No newline at end of file
diff --git a/examples/nlp/dialogue_state_tracking/data/process_multiwoz.py b/examples/nlp/dialogue_state_tracking/data/process_multiwoz.py
new file mode 100644
index 000000000000..e731139811a7
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/data/process_multiwoz.py
@@ -0,0 +1,479 @@
+7  #!/usr/bin/python
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# =============================================================================
+# Copyright 2019 Salesforce Research and Paweł Budzianowski.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom
+# the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# =============================================================================
+
+"""
+Dataset: http://dialogue.mi.eng.cam.ac.uk/index.php/corpus/
+
+Code adopted from:
+https://github.com/jasonwu0731/trade-dst
+https://github.com/budzianowski/multiwoz
+
+This script can be used to process and import MultiWOZ 2.0 and 2.1 datasets. 
+You may find more information on how to use this example in NeMo's documentation:
+https://nvidia.github.io/NeMo/nlp/dialogue_state_tracking_trade.html
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+from os.path import exists
+
+from nemo.collections.nlp.data.datasets.datasets_utils import if_exist
+
+# GLOBAL VARIABLES
+DICT_SIZE = 400
+MAX_LENGTH = 50
+IGNORE_KEYS_IN_GOAL = ['eod', 'topic', 'messageLen', 'message']
+
+# List of the domains to process
+DOMAINS = [u'taxi', u'restaurant', u'hospital', u'hotel', u'attraction', u'train', u'police']
+
+
+def is_ascii(s):
+    return all(ord(c) < 128 for c in s)
+
+
+def insertSpace(token, text):
+    sidx = 0
+    while True:
+        sidx = text.find(token, sidx)
+        if sidx == -1:
+            break
+        if sidx + 1 < len(text) and re.match('[0-9]', text[sidx - 1]) and re.match('[0-9]', text[sidx + 1]):
+            sidx += 1
+            continue
+        if text[sidx - 1] != ' ':
+            text = text[:sidx] + ' ' + text[sidx:]
+            sidx += 1
+        if sidx + len(token) < len(text) and text[sidx + len(token)] != ' ':
+            text = text[: sidx + 1] + ' ' + text[sidx + 1 :]
+        sidx += 1
+    return text
+
+
+def normalize(text):
+    # lower case every word
+    text = text.lower()
+
+    # replace white spaces in front and end
+    text = re.sub(r'^\s*|\s*$', '', text)
+
+    # hotel domain pfb30
+    text = re.sub(r"b&b", "bed and breakfast", text)
+    text = re.sub(r"b and b", "bed and breakfast", text)
+
+    # weird unicode bug
+    text = re.sub(u"(\u2018|\u2019)", "'", text)
+
+    # replace st.
+    text = text.replace(';', ',')
+    text = re.sub('$\/', '', text)
+    text = text.replace('/', ' and ')
+
+    # replace other special characters
+    text = text.replace('-', ' ')
+    text = re.sub('[\"\<>@\(\)]', '', text)  # remove
+
+    # insert white space before and after tokens:
+    for token in ['?', '.', ',', '!']:
+        text = insertSpace(token, text)
+
+    # insert white space for 's
+    text = insertSpace('\'s', text)
+
+    # replace it's, does't, you'd ... etc
+    text = re.sub('^\'', '', text)
+    text = re.sub('\'$', '', text)
+    text = re.sub('\'\s', ' ', text)
+    text = re.sub('\s\'', ' ', text)
+    for fromx, tox in REPLACEMENTS:
+        text = ' ' + text + ' '
+        text = text.replace(fromx, tox)[1:-1]
+
+    # remove multiple spaces
+    text = re.sub(' +', ' ', text)
+
+    # concatenate numbers
+    tokens = text.split()
+    i = 1
+    while i < len(tokens):
+        if re.match(u'^\d+$', tokens[i]) and re.match(u'\d+$', tokens[i - 1]):
+            tokens[i - 1] += tokens[i]
+            del tokens[i]
+        else:
+            i += 1
+    text = ' '.join(tokens)
+
+    return text
+
+
+def fixDelex(filename, data, data2, idx, idx_acts):
+    """Given system dialogue acts fix automatic delexicalization."""
+    try:
+        turn = data2[filename.strip('.json')][str(idx_acts)]
+    except:
+        return data
+
+    if not isinstance(turn, str):
+        for k, act in turn.items():
+            if 'Attraction' in k:
+                if 'restaurant_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("restaurant", "attraction")
+                if 'hotel_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("hotel", "attraction")
+            if 'Hotel' in k:
+                if 'attraction_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("attraction", "hotel")
+                if 'restaurant_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("restaurant", "hotel")
+            if 'Restaurant' in k:
+                if 'attraction_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("attraction", "restaurant")
+                if 'hotel_' in data['log'][idx]['text']:
+                    data['log'][idx]['text'] = data['log'][idx]['text'].replace("hotel", "restaurant")
+
+    return data
+
+
+def getDialogueAct(filename, data, data2, idx, idx_acts):
+    """Given system dialogue acts fix automatic delexicalization."""
+    acts = []
+    try:
+        turn = data2[filename.strip('.json')][str(idx_acts)]
+    except:
+        return acts
+
+    if not isinstance(turn, str):
+        for k in turn.keys():
+            if k.split('-')[1].lower() == 'request':
+                for a in turn[k]:
+                    acts.append(a[0].lower())
+            elif k.split('-')[1].lower() == 'inform':
+                for a in turn[k]:
+                    acts.append([a[0].lower(), normalize(a[1].lower())])
+    return acts
+
+
+def get_summary_bstate(bstate, get_domain=False):
+    """Based on the mturk annotations we form multi-domain belief state"""
+    summary_bstate = []
+    summary_bvalue = []
+    active_domain = []
+    for domain in DOMAINS:
+        domain_active = False
+
+        booking = []
+        for slot in sorted(bstate[domain]['book'].keys()):
+            if slot == 'booked':
+                if len(bstate[domain]['book']['booked']) != 0:
+                    booking.append(1)
+                else:
+                    booking.append(0)
+            else:
+                if bstate[domain]['book'][slot] != "":
+                    booking.append(1)
+                    summary_bvalue.append(
+                        [
+                            "{}-book {}".format(domain, slot.strip().lower()),
+                            normalize(bstate[domain]['book'][slot].strip().lower()),
+                        ]
+                    )
+                else:
+                    booking.append(0)
+        if domain == 'train':
+            if 'people' not in bstate[domain]['book'].keys():
+                booking.append(0)
+            if 'ticket' not in bstate[domain]['book'].keys():
+                booking.append(0)
+        summary_bstate += booking
+
+        for slot in bstate[domain]['semi']:
+            slot_enc = [0, 0, 0]  # not mentioned, dontcare, filled
+            if bstate[domain]['semi'][slot] == 'not mentioned':
+                slot_enc[0] = 1
+            elif bstate[domain]['semi'][slot] in ['dont care', 'dontcare', "don't care", "do not care"]:
+                slot_enc[1] = 1
+                summary_bvalue.append(["{}-{}".format(domain, slot.strip().lower()), "dontcare"])
+            elif bstate[domain]['semi'][slot]:
+                summary_bvalue.append(
+                    [
+                        "{}-{}".format(domain, slot.strip().lower()),
+                        normalize(bstate[domain]['semi'][slot].strip().lower()),
+                    ]
+                )
+            if slot_enc != [0, 0, 0]:
+                domain_active = True
+            summary_bstate += slot_enc
+
+        # quasi domain-tracker
+        if domain_active:
+            summary_bstate += [1]
+            active_domain.append(domain)
+        else:
+            summary_bstate += [0]
+
+    assert len(summary_bstate) == 94
+    if get_domain:
+        return active_domain
+    else:
+        return summary_bstate, summary_bvalue
+
+
+def analyze_dialogue(dialogue, maxlen):
+    """Cleaning procedure for all kinds of errors in text and annotation."""
+    d = dialogue
+    # do all the necessary postprocessing
+    if len(d['log']) % 2 != 0:
+        print('odd # of turns')
+        return None  # odd number of turns, wrong dialogue
+    d_pp = {}
+    d_pp['goal'] = d['goal']  # for now we just copy the goal
+    usr_turns = []
+    sys_turns = []
+    # last_bvs = []
+    for i in range(len(d['log'])):
+        if len(d['log'][i]['text'].split()) > maxlen:
+            return None  # too long sentence, wrong dialogue
+        if i % 2 == 0:  # usr turn
+            text = d['log'][i]['text']
+            if not is_ascii(text):
+                return None
+            usr_turns.append(d['log'][i])
+        else:  # sys turn
+            text = d['log'][i]['text']
+            if not is_ascii(text):
+                return None
+            belief_summary, belief_value_summary = get_summary_bstate(d['log'][i]['metadata'])
+            d['log'][i]['belief_summary'] = str(belief_summary)
+            d['log'][i]['belief_value_summary'] = belief_value_summary
+            sys_turns.append(d['log'][i])
+    d_pp['usr_log'] = usr_turns
+    d_pp['sys_log'] = sys_turns
+
+    return d_pp
+
+
+def get_dial(dialogue):
+    """Extract a dialogue from the file"""
+    dial = []
+    d_orig = analyze_dialogue(dialogue, MAX_LENGTH)  # max turn len is 50 words
+    if d_orig is None:
+        return None
+    usr = [t['text'] for t in d_orig['usr_log']]
+    sys = [t['text'] for t in d_orig['sys_log']]
+    sys_a = [t['dialogue_acts'] for t in d_orig['sys_log']]
+    bvs = [t['belief_value_summary'] for t in d_orig['sys_log']]
+    domain = [t['domain'] for t in d_orig['usr_log']]
+    for item in zip(usr, sys, sys_a, domain, bvs):
+        dial.append({'usr': item[0], 'sys': item[1], 'sys_a': item[2], 'domain': item[3], 'bvs': item[4]})
+    return dial
+
+
+def getDomain(idx, log, domains, last_domain):
+    if idx == 1:
+        active_domains = get_summary_bstate(log[idx]["metadata"], True)
+        crnt_doms = active_domains[0] if len(active_domains) != 0 else domains[0]
+        return crnt_doms
+    else:
+        ds_diff = get_ds_diff(log[idx - 2]["metadata"], log[idx]["metadata"])
+        if len(ds_diff.keys()) == 0:  # no clues from dialog states
+            crnt_doms = last_domain
+        else:
+            crnt_doms = list(ds_diff.keys())
+        return crnt_doms[0]
+
+
+def get_ds_diff(prev_d, crnt_d):
+    diff = {}
+    if not prev_d or not crnt_d:
+        return diff
+
+    for ((k1, v1), (k2, v2)) in zip(prev_d.items(), crnt_d.items()):
+        assert k1 == k2
+        if v1 != v2:  # updated
+            diff[k2] = v2
+    return diff
+
+
+def createData(source_data_dir):
+
+    data = json.load(open(f'{source_data_dir}/data.json', 'r'))
+    data2 = json.load(open(f'{source_data_dir}/dialogue_acts.json', 'r'))
+
+    delex_data = {}
+
+    for didx, dialogue_name in enumerate(data):
+
+        dialogue = data[dialogue_name]
+
+        domains = []
+        for dom_k, dom_v in dialogue['goal'].items():
+            if dom_v and dom_k not in IGNORE_KEYS_IN_GOAL:  # check whether contains some goal entities
+                domains.append(dom_k)
+
+        idx_acts = 1
+        last_domain, last_slot_fill = "", []
+        for idx, turn in enumerate(dialogue['log']):
+            origin_text = normalize(turn['text'])
+            dialogue['log'][idx]['text'] = origin_text
+
+            if idx % 2 == 1:  # if it's a system turn
+
+                cur_domain = getDomain(idx, dialogue['log'], domains, last_domain)
+                last_domain = [cur_domain]
+
+                dialogue['log'][idx - 1]['domain'] = cur_domain
+                dialogue['log'][idx]['dialogue_acts'] = getDialogueAct(dialogue_name, dialogue, data2, idx, idx_acts)
+                idx_acts += 1
+
+            # FIXING delexicalization:
+            dialogue = fixDelex(dialogue_name, dialogue, data2, idx, idx_acts)
+
+        delex_data[dialogue_name] = dialogue
+
+    return delex_data
+
+
+def divideData(data, infold, outfold):
+    """Given test and validation sets, divide
+    the data for three different sets"""
+
+    os.makedirs(outfold, exist_ok=True)
+    shutil.copyfile(f'{infold}/ontology.json', f'{outfold}/ontology.json')
+
+    testListFile = []
+    fin = open(f'{infold}/testListFile.json', 'r')
+    for line in fin:
+        testListFile.append(line[:-1])
+    fin.close()
+
+    valListFile = []
+    fin = open(f'{infold}/valListFile.json', 'r')
+    for line in fin:
+        valListFile.append(line[:-1])
+    fin.close()
+
+    test_dials = []
+    val_dials = []
+    train_dials = []
+
+    count_train, count_val, count_test = 0, 0, 0
+
+    for dialogue_name in data:
+        dial_item = data[dialogue_name]
+        domains = []
+        for dom_k, dom_v in dial_item['goal'].items():
+            if dom_v and dom_k not in IGNORE_KEYS_IN_GOAL:  # check whether contains some goal entities
+                domains.append(dom_k)
+
+        dial = get_dial(data[dialogue_name])
+        if dial:
+            dialogue = {}
+            dialogue['dialogue_idx'] = dialogue_name
+            dialogue['domains'] = list(set(domains))
+            last_bs = []
+            dialogue['dialogue'] = []
+
+            for turn_i, turn in enumerate(dial):
+                # usr, usr_o, sys, sys_o, sys_a, domain
+                turn_dialog = {}
+                turn_dialog['system_transcript'] = dial[turn_i - 1]['sys'] if turn_i > 0 else ""
+                turn_dialog['turn_idx'] = turn_i
+                turn_dialog['belief_state'] = [{"slots": [s], "act": "inform"} for s in turn['bvs']]
+                turn_dialog['turn_label'] = [bs["slots"][0] for bs in turn_dialog['belief_state'] if bs not in last_bs]
+                turn_dialog['transcript'] = turn['usr']
+                turn_dialog['system_acts'] = dial[turn_i - 1]['sys_a'] if turn_i > 0 else []
+                turn_dialog['domain'] = turn['domain']
+                last_bs = turn_dialog['belief_state']
+                dialogue['dialogue'].append(turn_dialog)
+
+            if dialogue_name in testListFile:
+                test_dials.append(dialogue)
+                count_test += 1
+            elif dialogue_name in valListFile:
+                val_dials.append(dialogue)
+                count_val += 1
+            else:
+                train_dials.append(dialogue)
+                count_train += 1
+
+    # save all dialogues
+    with open(f'{outfold}/dev_dials.json', 'w') as f:
+        json.dump(val_dials, f, indent=4)
+
+    with open(f'{outfold}/test_dials.json', 'w') as f:
+        json.dump(test_dials, f, indent=4)
+
+    with open(f'{outfold}/train_dials.json', 'w') as f:
+        json.dump(train_dials, f, indent=4)
+
+    print(f"Saving done. Generated dialogs: {count_train} train, {count_val} val, {count_test} test.")
+
+
+if __name__ == "__main__":
+    # Parse the command-line arguments.
+    parser = argparse.ArgumentParser(description='Process MultiWOZ dataset')
+    parser.add_argument(
+        "--source_data_dir", required=True, type=str, help='The path to the folder containing the MultiWOZ data files.'
+    )
+    parser.add_argument("--target_data_dir", default='multiwoz2.1/', type=str)
+    args = parser.parse_args()
+
+    if not exists(args.source_data_dir):
+        raise FileNotFoundError(f"{args.source_data_dir} does not exist.")
+
+    # Check if the files exist
+    if if_exist(args.target_data_dir, ['ontology.json', 'dev_dials.json', 'test_dials.json', 'train_dials.json']):
+        print(f'Data is already processed and stored at {args.source_data_dir}, skipping pre-processing.')
+        exit(0)
+
+    fin = open('multiwoz_mapping.pair', 'r')
+    REPLACEMENTS = []
+    for line in fin.readlines():
+        tok_from, tok_to = line.replace('\n', '').split('\t')
+        REPLACEMENTS.append((' ' + tok_from + ' ', ' ' + tok_to + ' '))
+
+    print('Creating dialogues...')
+    # Process MultiWOZ dataset
+    delex_data = createData(args.source_data_dir)
+    # Divide data
+    divideData(delex_data, args.source_data_dir, args.target_data_dir)
diff --git a/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_sgd.py b/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_sgd.py
new file mode 100644
index 000000000000..300544b422e5
--- /dev/null
+++ b/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_sgd.py
@@ -0,0 +1,465 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+'''
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/train_and_predict.py
+'''
+
+import argparse
+import math
+import os
+
+import nemo.collections.nlp as nemo_nlp
+import nemo.collections.nlp.data.datasets.sgd_dataset.data_processor as data_processor
+from nemo.collections.nlp.callbacks.sgd_callback import eval_epochs_done_callback, eval_iter_callback
+from nemo.collections.nlp.data.datasets.sgd_dataset.schema_processor import SchemaPreprocessor
+from nemo.collections.nlp.nm.trainables import SGDDecoderNM, SGDEncoderNM
+from nemo.core import Backend, CheckpointCallback, EvaluatorCallback, NeuralModuleFactory, SimpleLossLoggerCallback
+from nemo.utils import logging
+from nemo.utils.lr_policies import get_lr_policy
+
+# Parsing arguments
+parser = argparse.ArgumentParser(description='Schema_guided_dst')
+
+# BERT based utterance encoder related arguments
+parser.add_argument(
+    "--max_seq_length",
+    default=80,
+    type=int,
+    help="The maximum total input sequence length after WordPiece tokenization. "
+    "Sequences longer than this will be truncated, and sequences shorter "
+    "than this will be padded.",
+)
+parser.add_argument("--dropout", default=0.1, type=float, help="Dropout rate for BERT representations.")
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-cased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_checkpoint", default=None, type=str, help="Path to model checkpoint")
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
+
+# Hyperparameters and optimization related flags.
+parser.add_argument(
+    "--checkpoint_dir",
+    default=None,
+    type=str,
+    help="The folder containing the checkpoints for the model to continue training",
+)
+parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.")
+parser.add_argument("--eval_batch_size", default=8, type=int, help="Total batch size for eval.")
+parser.add_argument("--num_epochs", default=80, type=int, help="Total number of training epochs to perform.")
+
+parser.add_argument("--optimizer_kind", default="adam_w", type=str)
+parser.add_argument("--learning_rate", default=1e-4, type=float, help="The initial learning rate for Adam.")
+parser.add_argument("--lr_policy", default="PolynomialDecayAnnealing", type=str)
+parser.add_argument("--weight_decay", default=0.01, type=float)
+parser.add_argument(
+    "--lr_warmup_proportion",
+    default=0.1,
+    type=float,
+    help="Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10% of training.",
+)
+parser.add_argument("--grad_norm_clip", type=float, default=1, help="Gradient clipping")
+parser.add_argument("--local_rank", default=None, type=int)
+parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
+parser.add_argument("--num_gpus", default=1, type=int)
+
+# Input and output paths and other flags.
+parser.add_argument(
+    "--task_name",
+    default="sgd_single_domain",
+    type=str,
+    choices=data_processor.FILE_RANGES.keys(),
+    help="The name of the task to train.",
+)
+parser.add_argument(
+    "--data_dir",
+    type=str,
+    required=True,
+    help="Directory for the downloaded SGD data, which contains the dialogue files"
+    " and schema files of all datasets (eg train, dev)",
+)
+parser.add_argument(
+    "--work_dir",
+    type=str,
+    default="output/SGD",
+    help="The output directory where the model checkpoints will be written.",
+)
+parser.add_argument(
+    "--schema_embedding_dir",
+    type=str,
+    default='schema_embedding_dir',
+    help="Directory where .npy file for embedding of entities (slots, values, intents) in the dataset_split's schema are stored.",
+)
+parser.add_argument(
+    "--no_overwrite_schema_emb_files",
+    action="store_false",
+    help="Whether to generate a new file saving the dialogue examples.",
+    dest="overwrite_schema_emb_files",
+)
+parser.add_argument(
+    "--joint_acc_across_turn",
+    action="store_true",
+    help="Whether to compute joint accuracy across turn instead of across service. Should be set to True when conducting multiwoz style evaluation.",
+)
+parser.add_argument(
+    "--no_fuzzy_match",
+    action="store_true",
+    help="Whether to use fuzzy string matching when comparing non-categorical slot values. Fuzz match should not be used when conducting multiwoz style evaluation.",
+)
+parser.add_argument(
+    "--dialogues_example_dir",
+    type=str,
+    default="dialogues_example_dir",
+    help="Directory where preprocessed SGD dialogues are stored.",
+)
+parser.add_argument(
+    "--no_overwrite_dial_files",
+    action="store_false",
+    help="Whether to generate a new file saving the dialogue examples.",
+    dest="overwrite_dial_files",
+)
+parser.add_argument("--no_shuffle", action="store_true", help="Whether to shuffle training data")
+parser.add_argument("--no_time_to_log_dir", action="store_true", help="whether to add time to work_dir or not")
+parser.add_argument(
+    "--eval_dataset",
+    type=str,
+    default="dev_test",
+    choices=["dev", "test", "dev_test"],
+    help="Dataset splits for evaluation.",
+)
+parser.add_argument(
+    "--save_epoch_freq",
+    default=1,
+    type=int,
+    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+)
+parser.add_argument(
+    "--save_step_freq",
+    default=-1,
+    type=int,
+    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+)
+
+parser.add_argument(
+    "--loss_log_freq", default=-1, type=int, help="Frequency of logging loss values, '-1' - at the end of the epoch",
+)
+
+parser.add_argument(
+    "--loss_reduction",
+    default='mean',
+    type=str,
+    help="specifies the reduction to apply to the final loss, choose 'mean' or 'sum'",
+)
+
+parser.add_argument(
+    "--eval_epoch_freq", default=1, type=int, help="Frequency of evaluation",
+)
+
+parser.add_argument(
+    "--num_workers",
+    default=2,
+    type=int,
+    help="Number of workers for data loading, -1 means set it automatically to the number of CPU cores",
+)
+
+parser.add_argument(
+    "--enable_pin_memory", action="store_true", help="Enables the pin_memory feature of Pytroch's DataLoader",
+)
+
+parser.add_argument(
+    "--state_tracker",
+    type=str,
+    default='baseline',
+    choices=['baseline', 'ret_sys_act'],
+    help="Specifies the state tracker mode",
+)
+parser.add_argument(
+    "--schema_emb_init",
+    type=str,
+    default='baseline',
+    choices=['baseline', 'random', 'last_layer_average'],
+    help="Specifies how schema embeddings are generated. Baseline uses ['CLS'] token",
+)
+parser.add_argument(
+    "--train_schema_emb", action="store_true", help="Specifies whether schema embeddings are trainables.",
+)
+parser.add_argument(
+    "--add_attention_head",
+    action="store_true",
+    help="Whether to use attention when computing projections. When False, uses linear projection.",
+)
+parser.add_argument(
+    "--debug_mode", action="store_true", help="Enables debug mode with more info on data preprocessing and evaluation",
+)
+
+parser.add_argument(
+    "--checkpoints_to_keep", default=1, type=int, help="The number of last checkpoints to keep",
+)
+
+args = parser.parse_args()
+logging.info(args)
+
+if args.debug_mode:
+    logging.setLevel("DEBUG")
+
+if args.task_name == "multiwoz":
+    schema_config = {
+        "MAX_NUM_CAT_SLOT": 9,
+        "MAX_NUM_NONCAT_SLOT": 4,
+        "MAX_NUM_VALUE_PER_CAT_SLOT": 47,
+        "MAX_NUM_INTENT": 1,
+    }
+else:
+    schema_config = {
+        "MAX_NUM_CAT_SLOT": 6,
+        "MAX_NUM_NONCAT_SLOT": 12,
+        "MAX_NUM_VALUE_PER_CAT_SLOT": 12,
+        "MAX_NUM_INTENT": 4,
+    }
+
+if not os.path.exists(args.data_dir):
+    raise ValueError(f'Data not found at {args.data_dir}')
+
+nf = NeuralModuleFactory(
+    backend=Backend.PyTorch,
+    local_rank=args.local_rank,
+    optimization_level=args.amp_opt_level,
+    log_dir=args.work_dir,
+    create_tb_writer=True,
+    checkpoint_dir=args.checkpoint_dir,
+    files_to_copy=[__file__],
+    add_time_to_log_dir=not args.no_time_to_log_dir,
+)
+
+pretrained_bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
+
+schema_config["EMBEDDING_DIMENSION"] = pretrained_bert_model.hidden_size
+schema_config["MAX_SEQ_LENGTH"] = args.max_seq_length
+
+tokenizer = nemo_nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
+
+hidden_size = pretrained_bert_model.hidden_size
+
+# Run SGD preprocessor to generate and store schema embeddings
+schema_preprocessor = SchemaPreprocessor(
+    data_dir=args.data_dir,
+    schema_embedding_dir=args.schema_embedding_dir,
+    schema_config=schema_config,
+    tokenizer=tokenizer,
+    bert_model=pretrained_bert_model,
+    overwrite_schema_emb_files=args.overwrite_schema_emb_files,
+    bert_ckpt_dir=args.checkpoint_dir,
+    nf=nf,
+    mode=args.schema_emb_init,
+    is_trainable=args.train_schema_emb,
+)
+
+dialogues_processor = data_processor.SGDDataProcessor(
+    task_name=args.task_name,
+    data_dir=args.data_dir,
+    dialogues_example_dir=args.dialogues_example_dir,
+    tokenizer=tokenizer,
+    schema_emb_processor=schema_preprocessor,
+    overwrite_dial_files=args.overwrite_dial_files,
+)
+
+# define model pipeline
+sgd_encoder = SGDEncoderNM(hidden_size=hidden_size, dropout=args.dropout)
+sgd_decoder = SGDDecoderNM(
+    embedding_dim=hidden_size, schema_emb_processor=schema_preprocessor, add_attention_head=args.add_attention_head
+)
+dst_loss = nemo_nlp.nm.losses.SGDDialogueStateLossNM(reduction=args.loss_reduction)
+
+
+def create_pipeline(dataset_split='train'):
+    datalayer = nemo_nlp.nm.data_layers.SGDDataLayer(
+        dataset_split=dataset_split,
+        dialogues_processor=dialogues_processor,
+        batch_size=args.train_batch_size,
+        shuffle=not args.no_shuffle if dataset_split == 'train' else False,
+        num_workers=args.num_workers,
+        pin_memory=args.enable_pin_memory,
+    )
+    data = datalayer()
+
+    # Encode the utterances using BERT.
+    token_embeddings = pretrained_bert_model(
+        input_ids=data.utterance_ids, attention_mask=data.utterance_mask, token_type_ids=data.utterance_segment,
+    )
+    encoded_utterance, token_embeddings = sgd_encoder(hidden_states=token_embeddings)
+    (
+        logit_intent_status,
+        logit_req_slot_status,
+        logit_cat_slot_status,
+        logit_cat_slot_value,
+        logit_noncat_slot_status,
+        logit_noncat_slot_start,
+        logit_noncat_slot_end,
+    ) = sgd_decoder(
+        encoded_utterance=encoded_utterance,
+        token_embeddings=token_embeddings,
+        utterance_mask=data.utterance_mask,
+        cat_slot_values_mask=data.cat_slot_values_mask,
+        intent_status_mask=data.intent_status_mask,
+        service_ids=data.service_id,
+    )
+
+    if dataset_split == 'train':
+        loss = dst_loss(
+            logit_intent_status=logit_intent_status,
+            intent_status_labels=data.intent_status_labels,
+            logit_req_slot_status=logit_req_slot_status,
+            requested_slot_status=data.requested_slot_status,
+            req_slot_mask=data.req_slot_mask,
+            logit_cat_slot_status=logit_cat_slot_status,
+            categorical_slot_status=data.categorical_slot_status,
+            cat_slot_status_mask=data.cat_slot_status_mask,
+            logit_cat_slot_value=logit_cat_slot_value,
+            categorical_slot_values=data.categorical_slot_values,
+            logit_noncat_slot_status=logit_noncat_slot_status,
+            noncategorical_slot_status=data.noncategorical_slot_status,
+            noncat_slot_status_mask=data.noncat_slot_status_mask,
+            logit_noncat_slot_start=logit_noncat_slot_start,
+            logit_noncat_slot_end=logit_noncat_slot_end,
+            noncategorical_slot_value_start=data.noncategorical_slot_value_start,
+            noncategorical_slot_value_end=data.noncategorical_slot_value_end,
+        )
+        tensors = [loss]
+    else:
+        tensors = [
+            data.example_id_num,
+            data.service_id,
+            data.is_real_example,
+            data.start_char_idx,
+            data.end_char_idx,
+            logit_intent_status,
+            logit_req_slot_status,
+            logit_cat_slot_status,
+            logit_cat_slot_value,
+            logit_noncat_slot_status,
+            logit_noncat_slot_start,
+            logit_noncat_slot_end,
+            data.intent_status_labels,
+            data.requested_slot_status,
+            data.categorical_slot_status,
+            data.categorical_slot_values,
+            data.noncategorical_slot_status,
+        ]
+
+    steps_per_epoch = math.ceil(len(datalayer) / (args.train_batch_size * args.num_gpus))
+    return steps_per_epoch, tensors
+
+
+steps_per_epoch, train_tensors = create_pipeline()
+logging.info(f'Steps per epoch: {steps_per_epoch}')
+
+# Create trainer and execute training action
+train_callback = SimpleLossLoggerCallback(
+    tensors=train_tensors,
+    print_func=lambda x: logging.info("Loss: {:.8f}".format(x[0].item())),
+    get_tb_values=lambda x: [["loss", x[0]]],
+    tb_writer=nf.tb_writer,
+    step_freq=args.loss_log_freq if args.loss_log_freq > 0 else steps_per_epoch,
+)
+
+
+def get_eval_callback(eval_dataset):
+    _, eval_tensors = create_pipeline(dataset_split=eval_dataset)
+    eval_callback = EvaluatorCallback(
+        eval_tensors=eval_tensors,
+        user_iter_callback=lambda x, y: eval_iter_callback(x, y, schema_preprocessor, eval_dataset),
+        user_epochs_done_callback=lambda x: eval_epochs_done_callback(
+            x,
+            args.task_name,
+            eval_dataset,
+            args.data_dir,
+            nf.work_dir,
+            args.state_tracker,
+            args.debug_mode,
+            dialogues_processor,
+            schema_preprocessor,
+            args.joint_acc_across_turn,
+            args.no_fuzzy_match,
+        ),
+        tb_writer=nf.tb_writer,
+        eval_step=args.eval_epoch_freq * steps_per_epoch,
+    )
+    return eval_callback
+
+
+if args.eval_dataset == 'dev_test':
+    eval_callbacks = [get_eval_callback('dev'), get_eval_callback('test')]
+else:
+    eval_callbacks = [get_eval_callback(args.eval_dataset)]
+
+ckpt_callback = CheckpointCallback(
+    folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq, checkpoints_to_keep=1
+)
+
+lr_policy_fn = get_lr_policy(
+    args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
+)
+
+nf.train(
+    tensors_to_optimize=train_tensors,
+    callbacks=[train_callback, ckpt_callback] + eval_callbacks,
+    lr_policy=lr_policy_fn,
+    optimizer=args.optimizer_kind,
+    optimization_params={
+        "num_epochs": args.num_epochs,
+        "lr": args.learning_rate,
+        "eps": 1e-6,
+        "weight_decay": args.weight_decay,
+        "grad_norm_clip": args.grad_norm_clip,
+    },
+)
diff --git a/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py b/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py
index 9fc7a6ca7f29..2059a843de29 100644
--- a/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py
+++ b/examples/nlp/dialogue_state_tracking/dialogue_state_tracking_trade.py
@@ -14,63 +14,78 @@
 # limitations under the License.
 # =============================================================================
 
-""" An implementation of the paper "Transferable Multi-Domain State Generator
+"""
+An implementation of TRADE model introduced by the paper "Transferable Multi-Domain State Generator
 for Task-Oriented Dialogue Systems" (Wu et al., 2019 - ACL 2019)
 Adopted from: https://github.com/jasonwu0731/trade-dst
+
+TRADE is a state tracking model for goal-oriented dialogue systems.
+You may find more information on how to use this example in NeMo's documentation:
+https://nvidia.github.io/NeMo/nlp/dialogue_state_tracking_trade.html
 """
 
 import argparse
 import math
-import os
+from os.path import exists
 
-import numpy as np
-
-import nemo.collections.nlp as nemo_nlp
 import nemo.core as nemo_core
 from nemo import logging
 from nemo.backends.pytorch.common import EncoderRNN
+from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM
 from nemo.collections.nlp.callbacks.state_tracking_trade_callback import eval_epochs_done_callback, eval_iter_callback
-from nemo.collections.nlp.data.datasets.state_tracking_trade_dataset import MultiWOZDataDesc
+from nemo.collections.nlp.data.datasets.multiwoz_dataset import MultiWOZDataDesc
+from nemo.collections.nlp.nm.data_layers import MultiWOZDataLayer
+from nemo.collections.nlp.nm.losses import MaskedLogLoss
+from nemo.collections.nlp.nm.trainables import TRADEGenerator
 from nemo.utils.lr_policies import get_lr_policy
 
-parser = argparse.ArgumentParser(description='Dialog state tracking with TRADE model on MultiWOZ dataset')
-parser.add_argument("--local_rank", default=None, type=int)
+parser = argparse.ArgumentParser(description='Dialogue state tracking with TRADE model on MultiWOZ dataset')
+parser.add_argument("--data_dir", default='data/multiwoz2.1', type=str)
+parser.add_argument("--work_dir", default='outputs', type=str)
+parser.add_argument("--checkpoint_dir", default=None, type=str)
+
 parser.add_argument("--batch_size", default=16, type=int)
 parser.add_argument("--eval_batch_size", default=16, type=int)
-parser.add_argument("--num_gpus", default=1, type=int)
 parser.add_argument("--num_epochs", default=10, type=int)
-parser.add_argument("--lr_warmup_proportion", default=0.0, type=float)
+
+parser.add_argument("--num_gpus", default=1, type=int)
+parser.add_argument("--optimizer_kind", default="adam", type=str)
 parser.add_argument("--lr", default=0.001, type=float)
-parser.add_argument("--lr_policy", default=None, type=str)
+parser.add_argument("--lr_warmup_proportion", default=0.0, type=float)
+parser.add_argument("--lr_policy", default='SquareAnnealing', type=str)
 parser.add_argument("--min_lr", default=1e-4, type=float)
 parser.add_argument("--weight_decay", default=0.0, type=float)
+parser.add_argument("--grad_norm_clip", type=float, default=10, help="gradient clipping")
+parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
+
 parser.add_argument("--emb_dim", default=400, type=int)
 parser.add_argument("--hid_dim", default=400, type=int)
 parser.add_argument("--n_layers", default=1, type=int)
 parser.add_argument("--dropout", default=0.2, type=float)
 parser.add_argument("--input_dropout", default=0.2, type=float)
-parser.add_argument("--data_dir", default='data/statetracking/multiwoz2.1', type=str)
+parser.add_argument("--teacher_forcing", default=0.5, type=float)
+parser.add_argument(
+    "--no_shuffle_data", action='store_false', dest="shuffle_data", help="Shuffle is enabled by default."
+)
+
 parser.add_argument("--train_file_prefix", default='train', type=str)
 parser.add_argument("--eval_file_prefix", default='test', type=str)
-parser.add_argument("--work_dir", default='outputs', type=str)
-parser.add_argument("--save_epoch_freq", default=-1, type=int)
+parser.add_argument("--save_epoch_freq", default=1, type=int)
 parser.add_argument("--save_step_freq", default=-1, type=int)
-parser.add_argument("--optimizer_kind", default="adam", type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--shuffle_data", action='store_true')
 parser.add_argument("--num_train_samples", default=-1, type=int)
 parser.add_argument("--num_eval_samples", default=-1, type=int)
-parser.add_argument("--grad_norm_clip", type=float, default=10, help="gradient clipping")
-parser.add_argument("--teacher_forcing", default=0.5, type=float)
+parser.add_argument("--local_rank", default=None, type=int)
 args = parser.parse_args()
 
 # List of the domains to be considered
 domains = {"attraction": 0, "restaurant": 1, "taxi": 2, "train": 3, "hotel": 4}
 
-if not os.path.exists(args.data_dir):
-    raise ValueError(f'Data not found at {args.data_dir}')
+# Check if data dir exists
+if not exists(args.data_dir):
+    raise ValueError(f"Data folder `{args.data_dir}` not found")
 
-work_dir = f'{args.work_dir}/DST_TRADE'
+# Prepare the experiment output dir
+logging.info(f"Logging the experiment to: {args.work_dir}")
 
 data_desc = MultiWOZDataDesc(args.data_dir, domains)
 
@@ -78,7 +93,8 @@
     backend=nemo_core.Backend.PyTorch,
     local_rank=args.local_rank,
     optimization_level=args.amp_opt_level,
-    log_dir=work_dir,
+    log_dir=args.work_dir,
+    checkpoint_dir=args.checkpoint_dir,
     create_tb_writer=True,
     files_to_copy=[__file__],
     add_time_to_log_dir=True,
@@ -87,7 +103,7 @@
 vocab_size = len(data_desc.vocab)
 encoder = EncoderRNN(vocab_size, args.emb_dim, args.hid_dim, args.dropout, args.n_layers)
 
-decoder = nemo_nlp.nm.trainables.TRADEGenerator(
+decoder = TRADEGenerator(
     data_desc.vocab,
     encoder.embedding,
     args.hid_dim,
@@ -97,16 +113,16 @@
     teacher_forcing=args.teacher_forcing,
 )
 
-gate_loss_fn = nemo_nlp.nm.losses.CrossEntropyLoss3D(num_classes=len(data_desc.gating_dict))
-ptr_loss_fn = nemo_nlp.nm.losses.TRADEMaskedCrossEntropy()
-total_loss_fn = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)
+gate_loss_fn = CrossEntropyLossNM(logits_ndim=3)
+ptr_loss_fn = MaskedLogLoss()
+total_loss_fn = LossAggregatorNM(num_inputs=2)
 
 
 def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefix, is_training):
     logging.info(f"Loading {data_prefix} data...")
     shuffle = args.shuffle_data if is_training else False
 
-    data_layer = nemo_nlp.nm.data_layers.MultiWOZDataLayer(
+    data_layer = MultiWOZDataLayer(
         args.data_dir,
         data_desc.domains,
         all_domains=data_desc.all_domains,
@@ -122,8 +138,7 @@ def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefi
         input_dropout=input_dropout,
     )
 
-    src_ids, src_lens, tgt_ids, tgt_lens, gate_labels, turn_domain = data_layer()
-
+    input_data = data_layer()
     data_size = len(data_layer)
     logging.info(f'The length of data layer is {data_size}')
 
@@ -135,20 +150,32 @@ def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefi
     steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
     logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
-    outputs, hidden = encoder(inputs=src_ids, input_lens=src_lens)
+    outputs, hidden = encoder(inputs=input_data.src_ids, input_lens=input_data.src_lens)
 
     point_outputs, gate_outputs = decoder(
-        encoder_hidden=hidden, encoder_outputs=outputs, input_lens=src_lens, src_ids=src_ids, targets=tgt_ids
+        encoder_hidden=hidden,
+        encoder_outputs=outputs,
+        input_lens=input_data.src_lens,
+        src_ids=input_data.src_ids,
+        targets=input_data.tgt_ids,
     )
 
-    gate_loss = gate_loss_fn(logits=gate_outputs, labels=gate_labels)
-    ptr_loss = ptr_loss_fn(logits=point_outputs, targets=tgt_ids, loss_mask=tgt_lens)
+    gate_loss = gate_loss_fn(logits=gate_outputs, labels=input_data.gating_labels)
+    ptr_loss = ptr_loss_fn(logits=point_outputs, labels=input_data.tgt_ids, length_mask=input_data.tgt_lens)
     total_loss = total_loss_fn(loss_1=gate_loss, loss_2=ptr_loss)
 
     if is_training:
         tensors_to_evaluate = [total_loss, gate_loss, ptr_loss]
     else:
-        tensors_to_evaluate = [total_loss, point_outputs, gate_outputs, gate_labels, turn_domain, tgt_ids, tgt_lens]
+        tensors_to_evaluate = [
+            total_loss,
+            point_outputs,
+            gate_outputs,
+            input_data.gating_labels,
+            input_data.turn_domain,
+            input_data.tgt_ids,
+            input_data.tgt_lens,
+        ]
 
     return tensors_to_evaluate, total_loss, ptr_loss, gate_loss, steps_per_epoch, data_layer
 
@@ -182,9 +209,9 @@ def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefi
 train_callback = nemo_core.SimpleLossLoggerCallback(
     tensors=[total_loss_train, gate_loss_train, ptr_loss_train],
     print_func=lambda x: logging.info(
-        f'Loss:{str(np.round(x[0].item(), 3))}, '
-        f'Gate Loss:{str(np.round(x[1].item(), 3))}, '
-        f'Pointer Loss:{str(np.round(x[2].item(), 3))}'
+        f'Total Loss:{str(round(x[0].item(), 3))}, '
+        f'Gate Loss:{str(round(x[1].item(), 3))}, '
+        f'Pointer Loss:{str(round(x[2].item(), 3))}'
     ),
     tb_writer=nf.tb_writer,
     get_tb_values=lambda x: [["loss", x[0]], ["gate_loss", x[1]], ["pointer_loss", x[2]]],
@@ -203,7 +230,7 @@ def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefi
     folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
 )
 
-if args.lr_policy is not None:
+if args.lr_policy:
     total_steps = args.num_epochs * steps_per_epoch_train
     lr_policy_fn = get_lr_policy(
         args.lr_policy, total_steps=total_steps, warmup_ratio=args.lr_warmup_proportion, min_lr=args.min_lr
@@ -212,6 +239,7 @@ def create_pipeline(num_samples, batch_size, num_gpus, input_dropout, data_prefi
     lr_policy_fn = None
 
 grad_norm_clip = args.grad_norm_clip if args.grad_norm_clip > 0 else None
+
 nf.train(
     tensors_to_optimize=[total_loss_train],
     callbacks=[eval_callback, train_callback, ckpt_callback],
diff --git a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py
index 7b90c132a506..2158cd4c93a2 100644
--- a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py
+++ b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py
@@ -1,19 +1,22 @@
-"""
-Copyright 2018 The Google AI Language Team Authors and
-The HuggingFace Inc. team.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
+"""
 Some transformer of this code were adapted from the HuggingFace library at
 https://github.com/huggingface/transformers
 
@@ -28,6 +31,7 @@
 --data_dir /path_to_data_dir/MRPC \
 --task_name mrpc \
 --work_dir /path_to_output_folder \
+--pretrained_model_name bert-base-uncased \
 
 To run this example on 4 GPUs with mixed precision:
 python -m torch.distributed.launch \
@@ -37,6 +41,7 @@
 --work_dir /path_to_output_folder \
 --num_gpus=4 \
 --amp_opt_level=O1 \
+--pretrained_model_name bert-base-uncased \
 
 The generated predictions and associated labels will be stored in the
 word_dir in {task_name}.txt along with the checkpoints and tensorboard files.
@@ -66,11 +71,11 @@
 from transformers import BertConfig
 
 import nemo.collections.nlp as nemo_nlp
+import nemo.collections.nlp.data.tokenizers.tokenizer_utils
 import nemo.core as nemo_core
 from nemo import logging
-from nemo.backends.pytorch.common import CrossEntropyLoss, MSELoss
+from nemo.backends.pytorch.common import CrossEntropyLossNM, MSELoss
 from nemo.collections.nlp.callbacks.glue_benchmark_callback import eval_epochs_done_callback, eval_iter_callback
-from nemo.collections.nlp.data import NemoBertTokenizer, SentencePieceTokenizer
 from nemo.collections.nlp.data.datasets.glue_benchmark_dataset import output_modes, processors
 from nemo.collections.nlp.nm.data_layers import GlueClassificationDataLayer, GlueRegressionDataLayer
 from nemo.collections.nlp.nm.trainables import SequenceClassifier, SequenceRegression
@@ -78,10 +83,9 @@
 
 parser = argparse.ArgumentParser(description="GLUE_with_pretrained_BERT")
 
-# Parsing arguments
 parser.add_argument(
     "--data_dir",
-    default='COLA',
+    default="COLA",
     type=str,
     required=True,
     help="The input data dir. Should contain the .tsv files (or other data files) for the task.",
@@ -91,17 +95,21 @@
     default="CoLA",
     type=str,
     required=True,
-    choices=['cola', 'sst-2', 'mrpc', 'sts-b', 'qqp', 'mnli', 'qnli', 'rte', 'wnli'],
+    choices=["cola", "sst-2", "mrpc", "sts-b", "qqp", "mnli", "qnli", "rte", "wnli"],
     help="GLUE task name, MNLI includes both matched and mismatched tasks",
 )
 parser.add_argument(
-    "--pretrained_bert_model", default="bert-base-cased", type=str, help="Name of the pre-trained model"
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
 )
 parser.add_argument("--bert_checkpoint", default=None, type=str, help="Path to model checkpoint")
 parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
 parser.add_argument(
     "--tokenizer_model",
-    default="tokenizer.model",
+    default=None,
     type=str,
     help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
 )
@@ -112,6 +120,13 @@
     choices=["nemobert", "sentencepiece"],
     help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
 )
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
 parser.add_argument(
     "--max_seq_length",
     default=128,
@@ -129,15 +144,22 @@
 parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training/evaluation.")
 parser.add_argument("--num_gpus", default=1, type=int, help="Number of GPUs")
 parser.add_argument(
-    "--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"], help="01/02 to enable mixed precision"
+    "--amp_opt_level", default="O1", type=str, choices=["O0", "O1", "O2"], help="01/02 to enable mixed precision"
 )
 parser.add_argument("--local_rank", type=int, default=None, help="For distributed training: local_rank")
 parser.add_argument(
     "--work_dir",
-    default='output_glue',
+    default="output_glue",
     type=str,
     help="The output directory where the model predictions and checkpoints will be written.",
 )
+parser.add_argument(
+    "--checkpoint_dir",
+    default=None,
+    type=str,
+    help="The folder containing the checkpoints for the model to continue training",
+)
+parser.add_argument("--no_time_to_log_dir", action="store_true", help="whether to add time to work_dir or not")
 parser.add_argument(
     "--save_epoch_freq",
     default=1,
@@ -150,7 +172,17 @@
     type=int,
     help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
 )
-parser.add_argument("--loss_step_freq", default=25, type=int, help="Frequency of printing loss")
+parser.add_argument("--loss_step_freq", default=-1, type=int, help="Frequency of printing loss")
+parser.add_argument(
+    "--no_data_cache", action="store_true", help="When specified do not load and store cache preprocessed data.",
+)
+parser.add_argument("--no_shuffle_data", action="store_false", dest="shuffle_data")
+parser.add_argument(
+    "--wandb_project", default=None, type=str, help='Project name for tracking with Weights and Biases'
+)
+parser.add_argument(
+    "--wandb_experiment", default=None, type=str, help='Experiment name for tracking with Weights and Biases'
+)
 
 args = parser.parse_args()
 
@@ -160,13 +192,11 @@
         "obtained at https://gist.github.com/W4ngatang/60c2bdb54d156a41194446737ce03e2e"
     )
 
-args.work_dir = f'{args.work_dir}/{args.task_name.upper()}'
-
 """
 Prepare GLUE task
 MNLI task has two separate dev sets: matched and mismatched
 """
-if args.task_name == 'mnli':
+if args.task_name == "mnli":
     eval_task_names = ("mnli", "mnli-mm")
     task_processors = (processors["mnli"](), processors["mnli-mm"]())
 else:
@@ -184,64 +214,37 @@
     optimization_level=args.amp_opt_level,
     log_dir=args.work_dir,
     create_tb_writer=True,
+    checkpoint_dir=args.checkpoint_dir,
     files_to_copy=[__file__],
-    add_time_to_log_dir=True,
+    add_time_to_log_dir=not args.no_time_to_log_dir,
 )
 
+logging.info(f"{args}")
 
-if args.bert_checkpoint is None:
-    """ Use this if you're using a standard BERT model.
-    To see the list of pretrained models, call:
-    nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()
-    """
-    tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-else:
-    """ Use this if you're using a BERT model that you pre-trained yourself.
-    Replace BERT-STEP-150000.pt with the path to your checkpoint.
-    """
-    if args.tokenizer == "sentencepiece":
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
-        tokenizer = SentencePieceTokenizer(model_path=args.tokenizer_model, special_tokens=special_tokens)
-    elif args.tokenizer == "nemobert":
-        tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    else:
-        raise ValueError(f"received unexpected tokenizer '{args.tokenizer}'")
-
-    if args.bert_config is not None:
-        config = BertConfig.from_json_file(args.bert_config).to_dict()
-        args.vocab_size = config['vocab_size']
-        args.hidden_size = config['hidden_size']
-        args.num_hidden_layers = config['num_hidden_layers']
-        args.num_attention_heads = config['num_attention_heads']
-        args.intermediate_size = config['intermediate_size']
-        args.hidden_act = config['hidden_act']
-        args.max_seq_length = config['max_position_embeddings']
-
-        model = nemo_nlp.nm.trainables.huggingface.BERT(
-            vocab_size=args.vocab_size,
-            num_hidden_layers=args.num_hidden_layers,
-            hidden_size=args.hidden_size,
-            num_attention_heads=args.num_attention_heads,
-            intermediate_size=args.intermediate_size,
-            max_position_embeddings=args.max_seq_length,
-            hidden_act=args.hidden_act,
-        )
-        logging.info(f"using {args.bert_config}")
-    else:
-        model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-    model.restore_from(args.bert_checkpoint)
-    logging.info(f"model resotred from {args.bert_checkpoint}")
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
+
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
 
 hidden_size = model.hidden_size
 
 # uses [CLS] token for classification (the first token)
-if args.task_name == 'sts-b':
+if args.task_name == "sts-b":
     pooler = SequenceRegression(hidden_size=hidden_size)
     glue_loss = MSELoss()
 else:
     pooler = SequenceClassifier(hidden_size=hidden_size, num_classes=num_labels, log_softmax=False)
-    glue_loss = CrossEntropyLoss()
+    glue_loss = CrossEntropyLossNM()
 
 
 def create_pipeline(
@@ -253,19 +256,18 @@ def create_pipeline(
     processor=task_processors[0],
 ):
     data_layer = GlueClassificationDataLayer
-    if output_mode == 'regression':
+    if output_mode == "regression":
         data_layer = GlueRegressionDataLayer
 
     data_layer = data_layer(
         processor=processor,
         evaluate=evaluate,
         batch_size=batch_size,
-        # num_workers=0,
-        # local_rank=local_rank,
         tokenizer=tokenizer,
         data_dir=args.data_dir,
         max_seq_length=max_seq_length,
-        token_params=token_params,
+        use_data_cache=not args.no_data_cache,
+        shuffle=False if evaluate else args.shuffle_data,
     )
 
     input_ids, input_type_ids, input_mask, labels = data_layer()
@@ -279,7 +281,7 @@ def create_pipeline(
     represents logits.
     """
     pooler_output = pooler(hidden_states=hidden_states)
-    if args.task_name == 'sts-b':
+    if args.task_name == "sts-b":
         loss = glue_loss(preds=pooler_output, labels=labels)
     else:
         loss = glue_loss(logits=pooler_output, labels=labels)
@@ -288,7 +290,7 @@ def create_pipeline(
     return loss, steps_per_epoch, data_layer, [pooler_output, labels]
 
 
-token_params = {'bos_token': None, 'eos_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]'}
+token_params = {"bos_token": None, "eos_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]"}
 
 train_loss, steps_per_epoch, _, _ = create_pipeline()
 _, _, eval_data_layer, eval_tensors = create_pipeline(evaluate=True)
@@ -300,6 +302,8 @@ def create_pipeline(
         user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, args.work_dir, eval_task_names[0]),
         tb_writer=nf.tb_writer,
         eval_step=steps_per_epoch,
+        wandb_name=args.wandb_experiment,
+        wandb_project=args.wandb_project,
     )
 ]
 
@@ -307,7 +311,7 @@ def create_pipeline(
 MNLI task has two dev sets: matched and mismatched
 Create additional callback and data layer for MNLI mismatched dev set
 """
-if args.task_name == 'mnli':
+if args.task_name == "mnli":
     _, _, eval_data_layer_mm, eval_tensors_mm = create_pipeline(evaluate=True, processor=task_processors[1])
     callbacks_eval.append(
         nemo_core.EvaluatorCallback(
@@ -322,9 +326,9 @@ def create_pipeline(
 logging.info(f"steps_per_epoch = {steps_per_epoch}")
 callback_train = nemo_core.SimpleLossLoggerCallback(
     tensors=[train_loss],
-    print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
+    print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
     get_tb_values=lambda x: [["loss", x[0]]],
-    step_freq=args.loss_step_freq,
+    step_freq=args.loss_step_freq if args.loss_step_freq > 0 else steps_per_epoch,
     tb_writer=nf.tb_writer,
 )
 
@@ -332,14 +336,27 @@ def create_pipeline(
     folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
 )
 
+callbacks = [callback_train, ckpt_callback] + callbacks_eval
+
+if args.wandb_project and args.wandb_experiment:
+    wand_callback = nemo.core.WandbCallback(
+        train_tensors=[train_loss],
+        wandb_name=args.wandb_experiment,
+        wandb_project=args.wandb_project,
+        update_freq=args.loss_step_freq if args.loss_step_freq > 0 else steps_per_epoch,
+        args=args,
+    )
+    callbacks.append(wand_callback)
+
 lr_policy_fn = get_lr_policy(
     args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
 )
 
+
 nf.train(
     tensors_to_optimize=[train_loss],
-    callbacks=[callback_train, ckpt_callback] + callbacks_eval,
+    callbacks=callbacks,
     lr_policy=lr_policy_fn,
     optimizer=args.optimizer_kind,
-    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr},
+    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay},
 )
diff --git a/examples/nlp/intent_detection_slot_tagging/data/assistant_utils.py b/examples/nlp/intent_detection_slot_tagging/data/assistant_utils.py
new file mode 100644
index 000000000000..fb7446991437
--- /dev/null
+++ b/examples/nlp/intent_detection_slot_tagging/data/assistant_utils.py
@@ -0,0 +1,157 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import re
+import shutil
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import (
+    DATABASE_EXISTS_TMP,
+    if_exist,
+    write_files,
+)
+
+
+def copy_input_files(infold):
+    """ Put training files in convenient place for conversion to our format. """
+    our_infold = infold + "/dataset"
+
+    if os.path.exists(our_infold + "/trainset") and os.path.exists(our_infold + "/testset"):
+        logging.info("Input folders exists")
+        return
+
+    logging.info(f"Copying files to input folder: {our_infold}")
+    os.makedirs(infold, exist_ok=True)
+
+    old_infold = (
+        infold + '/CrossValidation/autoGeneFromRealAnno/autoGene_2018_03_22-13_01_25_169/CrossValidation/KFold_1'
+    )
+    if not os.path.exists(our_infold + "/trainset"):
+        shutil.copytree(old_infold + '/trainset', our_infold + '/trainset')
+
+    if not os.path.exists(our_infold + "/testset"):
+        shutil.copytree(old_infold + '/testset/csv', our_infold + '/testset')
+
+
+def get_intents(infold):
+    """ Get list of intents from file names. """
+    intents = [f[:-4] for f in os.listdir(infold)]
+    intents.sort()
+    print(f'Found {len(intents)} intents')
+    return intents
+
+
+def get_intent_queries(infold, intent_names, mode):
+    """ Get list of queries with their corresponding intent number. """
+    intent_queries = ['sentence\tlabel\n']
+
+    for index, intent in enumerate(intent_names):
+        queries = open(f'{infold}/{mode}set/{intent}.csv', 'r').readlines()
+        for query in queries[1:]:
+            phrases = query.split(";")
+            intent_query = phrases[4][1:-1] + "\t" + str(index)
+            intent_queries.append(intent_query)
+
+    return intent_queries
+
+
+def get_slots(infold, modes):
+    """
+    Find a lost of unique slot types in training and testing data.
+    We use a single slot type name both for starting and continuation tokes (not using B-, I- notation).
+    """
+    slots = set()
+
+    for mode in modes:
+        path = f'{infold}/{mode}set'
+        for filename in os.listdir(path):
+            lines = open(f'{path}/{filename}', 'r').readlines()
+            for line in lines[1:]:
+                query = line.split(";")[3]
+                slot_phrases = re.findall('\[.*?\]', query)
+                for slot_phrase in slot_phrases:
+                    slot = slot_phrase.split(" : ")[0][1:]
+                    slots.add(slot)
+
+    slots = sorted(slots)
+    slots.append("O")
+    print(f'Found {len(slots)} slot types')
+    return slots
+
+
+def get_slot_queries(infold, slot_dict, mode, intent_names):
+    """ Convert each word in a query to corresponding slot number. """
+    slot_queries = []
+    outside_slot = len(slot_dict) - 1
+
+    # keep the same order of files/queries as for intents
+    for intent in intent_names:
+        lines = open(f'{infold}/{mode}set/{intent}.csv', 'r').readlines()
+        for line in lines[1:]:
+            slot_query = ""
+            query = line.split(";")[3]
+            words = query.split(" ")
+            current_slot = outside_slot
+            for word in words:
+                if word[0] == "[":
+                    current_slot = slot_dict[word[1:]]
+                elif word[0] == ":":
+                    continue
+                else:
+                    slot_query += str(current_slot) + " "
+                    if word[-1] == ']':
+                        current_slot = outside_slot
+
+            slot_queries.append(slot_query.strip())
+
+    return slot_queries
+
+
+def process_assistant(infold, outfold, modes=['train', 'test']):
+    """
+    https://github.com/xliuhw/NLU-Evaluation-Data - this dataset includes
+    about 25 thousand examples with 66 various multi-domain intents and 57 entity types.
+    """
+    if if_exist(outfold, [f'{mode}_slots.tsv' for mode in modes]):
+        logging.info(DATABASE_EXISTS_TMP.format('robot', outfold))
+        return outfold
+
+    logging.info(f'Processing assistant commands dataset and store at {outfold}')
+    os.makedirs(outfold, exist_ok=True)
+
+    # copy train/test files to the convenient directory to work with
+    copy_input_files(infold)
+    infold += "/dataset"
+
+    # get list of intents from train folder (test folder supposed to be the same)
+    intent_names = get_intents(infold + "/trainset")
+    write_files(intent_names, f'{outfold}/dict.intents.csv')
+
+    # get all train and test queries with their intent
+    for mode in modes:
+        intent_queries = get_intent_queries(infold, intent_names, mode)
+        write_files(intent_queries, f'{outfold}/{mode}.tsv')
+
+    # get list of all unique slots in training and testing files
+    slot_types = get_slots(infold, modes)
+    write_files(slot_types, f'{outfold}/dict.slots.csv')
+
+    # create files of slot queries
+    slot_dict = {k: v for v, k in enumerate(slot_types)}
+    for mode in modes:
+        slot_queries = get_slot_queries(infold, slot_dict, mode, intent_names)
+        write_files(slot_queries, f'{outfold}/{mode}_slots.tsv')
diff --git a/examples/nlp/intent_detection_slot_tagging/data/dialogflow_utils.py b/examples/nlp/intent_detection_slot_tagging/data/dialogflow_utils.py
new file mode 100644
index 000000000000..77278ca3b81f
--- /dev/null
+++ b/examples/nlp/intent_detection_slot_tagging/data/dialogflow_utils.py
@@ -0,0 +1,109 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import json
+import os
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils import partition_data, write_files
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import DATABASE_EXISTS_TMP, if_exist
+
+__all__ = [
+    'get_intent_query_files_dialogflow',
+    'get_intents_slots_dialogflow',
+    'get_slots_dialogflow',
+    'process_dialogflow',
+]
+
+
+def get_intent_query_files_dialogflow(path):
+    fileslist = []
+    for root, _, files in os.walk(path):
+        for file in files:
+            if '_usersays_en.json' in file:
+                fileslist.append(os.path.join(root, file))
+    return fileslist
+
+
+def get_intents_slots_dialogflow(files, slot_labels):
+    intent_names = []
+    intent_queries = []
+    slot_tags = []
+
+    for index, file in enumerate(files):
+        intent_names.append(os.path.basename(file).split('_usersays')[0])
+
+        with open(file) as json_file:
+            intent_data = json.load(json_file)
+            for query in intent_data:
+                query_text = ""
+                slots = ""
+                for segment in query['data']:
+                    query_text = ''.join([query_text, segment['text']])
+                    if 'alias' in segment:
+                        for _ in segment['text'].split():
+                            slots = ' '.join([slots, slot_labels.get(segment['alias'])])
+                    else:
+                        for _ in segment['text'].split():
+                            slots = ' '.join([slots, slot_labels.get('O')])
+                query_text = f'{query_text.strip()}\t{index}\n'
+                intent_queries.append(query_text)
+                slots = f'{slots.strip()}\n'
+                slot_tags.append(slots)
+    return intent_queries, intent_names, slot_tags
+
+
+def get_slots_dialogflow(files):
+    slot_labels = {}
+    count = 0
+    for file in files:
+        intent_head_file = ''.join([file.split('_usersays')[0], '.json'])
+        with open(intent_head_file) as json_file:
+            intent_meta_data = json.load(json_file)
+            for params in intent_meta_data['responses'][0]['parameters']:
+                if params['name'] not in slot_labels:
+                    slot_labels[params['name']] = str(count)
+                    count += 1
+    slot_labels['O'] = str(count)
+    return slot_labels
+
+
+def process_dialogflow(infold, outfold, dev_split=0.1):
+    if not os.path.exists(infold):
+        link = 'www.dialogflow.com'
+        raise ValueError(
+            f'Data not found at {infold}. ' f'Export your dialogflow data from' f'{link} and unzip at {infold}.'
+        )
+
+    if if_exist(outfold, [f'{mode}.tsv' for mode in ['train', 'test']]):
+        logging.info(DATABASE_EXISTS_TMP.format('mturk', outfold))
+        return
+
+    os.makedirs(outfold, exist_ok=True)
+
+    files = get_intent_query_files_dialogflow(infold)
+    slot_labels = get_slots_dialogflow(files)
+    intent_queries, intent_names, slot_tags = get_intents_slots_dialogflow(files, slot_labels)
+    train_queries, train_slots, test_queries, test_slots = partition_data(intent_queries, slot_tags, split=dev_split)
+
+    write_files(train_queries, f'{outfold}/train.tsv')
+    write_files(train_slots, f'{outfold}/train_slots.tsv')
+
+    write_files(test_queries, f'{outfold}/test.tsv')
+    write_files(test_slots, f'{outfold}/test_slots.tsv')
+
+    write_files(slot_labels, f'{outfold}/dict.slots.csv')
+    write_files(intent_names, f'{outfold}/dict.intents.csv')
diff --git a/examples/nlp/intent_detection_slot_tagging/data/import_datasets.py b/examples/nlp/intent_detection_slot_tagging/data/import_datasets.py
new file mode 100755
index 000000000000..7661f426f8ae
--- /dev/null
+++ b/examples/nlp/intent_detection_slot_tagging/data/import_datasets.py
@@ -0,0 +1,281 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+import os
+import shutil
+from os.path import exists
+
+from assistant_utils import process_assistant
+from dialogflow_utils import process_dialogflow
+from mturk_utils import process_mturk
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils import (
+    DATABASE_EXISTS_TMP,
+    MODE_EXISTS_TMP,
+    create_dataset,
+    get_dataset,
+    if_exist,
+)
+from nemo.collections.nlp.utils import get_vocab
+
+
+def ids2text(ids, vocab):
+    return ' '.join([vocab[int(id_)] for id_ in ids])
+
+
+def process_atis(infold, outfold, modes=['train', 'test'], do_lower_case=False):
+    """ MSFT's dataset, processed by Kaggle
+    https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk
+    """
+    vocab = get_vocab(f'{infold}/atis.dict.vocab.csv')
+
+    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
+        logging.info(DATABASE_EXISTS_TMP.format('ATIS', outfold))
+        return outfold
+    logging.info(f'Processing ATIS dataset and storing at {outfold}.')
+
+    os.makedirs(outfold, exist_ok=True)
+
+    outfiles = {}
+    for mode in modes:
+        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
+        outfiles[mode].write('sentence\tlabel\n')
+        outfiles[mode + '_slots'] = open(f'{outfold}/{mode}_slots.tsv', 'w')
+
+        queries = open(f'{infold}/atis.{mode}.query.csv', 'r').readlines()
+        intents = open(f'{infold}/atis.{mode}.intent.csv', 'r').readlines()
+        slots = open(f'{infold}/atis.{mode}.slots.csv', 'r').readlines()
+
+        for i, query in enumerate(queries):
+            sentence = ids2text(query.strip().split()[1:-1], vocab)
+            if do_lower_case:
+                sentence = sentence.lower()
+            outfiles[mode].write(f'{sentence}\t{intents[i].strip()}\n')
+            slot = ' '.join(slots[i].strip().split()[1:-1])
+            outfiles[mode + '_slots'].write(slot + '\n')
+
+    shutil.copyfile(f'{infold}/atis.dict.intent.csv', f'{outfold}/dict.intents.csv')
+    shutil.copyfile(f'{infold}/atis.dict.slots.csv', f'{outfold}/dict.slots.csv')
+    for mode in modes:
+        outfiles[mode].close()
+
+
+def process_snips(infold, outfold, do_lower_case, modes=['train', 'test'], dev_split=0.1):
+    if not os.path.exists(infold):
+        link = 'https://github.com/snipsco/spoken-language-understanding-research-datasets'
+        raise ValueError(f'Data not found at {infold}. ' f'You may request to download the SNIPS dataset from {link}.')
+
+    exist = True
+    for dataset in ['light', 'speak', 'all']:
+        if if_exist(f'{outfold}/{dataset}', [f'{mode}.tsv' for mode in modes]):
+            logging.info(DATABASE_EXISTS_TMP.format('SNIPS-' + dataset, outfold))
+        else:
+            exist = False
+    if exist:
+        return outfold
+
+    logging.info(f'Processing SNIPS dataset and storing at folders "speak", "light" and "all" under {outfold}.')
+    logging.info(
+        f'Processing and importing "smart-speaker-en-close-field" -> "speak" and "smart-speaker-en-close-field" -> "light".'
+    )
+
+    os.makedirs(outfold, exist_ok=True)
+
+    speak_dir = 'smart-speaker-en-close-field'
+    light_dir = 'smart-lights-en-close-field'
+
+    light_files = [f'{infold}/{light_dir}/dataset.json']
+    speak_files = [f'{infold}/{speak_dir}/training_dataset.json']
+    speak_files.append(f'{infold}/{speak_dir}/test_dataset.json')
+
+    light_train, light_dev, light_slots, light_intents = get_dataset(light_files, dev_split)
+    speak_train, speak_dev, speak_slots, speak_intents = get_dataset(speak_files)
+
+    create_dataset(light_train, light_dev, light_slots, light_intents, do_lower_case, f'{outfold}/light')
+    create_dataset(speak_train, speak_dev, speak_slots, speak_intents, do_lower_case, f'{outfold}/speak')
+    create_dataset(
+        light_train + speak_train,
+        light_dev + speak_dev,
+        light_slots | speak_slots,
+        light_intents | speak_intents,
+        do_lower_case,
+        f'{outfold}/all',
+    )
+
+
+def process_jarvis_datasets(
+    infold, outfold, modes=['train', 'test', 'dev'], do_lower_case=False, ignore_prev_intent=False
+):
+    """ process and convert Jarvis datasets into NeMo's BIO format
+    """
+    dataset_name = "jarvis"
+    if if_exist(outfold, ['dict.intents.csv', 'dict.slots.csv']):
+        logging.info(DATABASE_EXISTS_TMP.format(dataset_name, outfold))
+        return outfold
+
+    logging.info(f'Processing {dataset_name} dataset and storing at {outfold}')
+
+    os.makedirs(outfold, exist_ok=True)
+
+    outfiles = {}
+    intents_list = {}
+    slots_list = {}
+    slots_list_all = {}
+
+    outfiles['dict_intents'] = open(f'{outfold}/dict.intents.csv', 'w')
+    outfiles['dict_slots'] = open(f'{outfold}/dict.slots.csv', 'w')
+
+    outfiles['dict_slots'].write('O\n')
+    slots_list["O"] = 0
+    slots_list_all["O"] = 0
+
+    for mode in modes:
+        if if_exist(outfold, [f'{mode}.tsv']):
+            logging.info(MODE_EXISTS_TMP.format(mode, dataset_name, outfold, mode))
+            continue
+
+        if not if_exist(infold, [f'{mode}.tsv']):
+            logging.info(f'{mode} mode of {dataset_name}' f' is skipped as it was not found.')
+            continue
+
+        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
+        outfiles[mode].write('sentence\tlabel\n')
+        outfiles[mode + '_slots'] = open(f'{outfold}/{mode}_slots.tsv', 'w')
+
+        queries = open(f'{infold}/{mode}.tsv', 'r').readlines()
+
+        for i, query in enumerate(queries):
+            line_splits = query.strip().split("\t")
+            if len(line_splits) == 3:
+                intent_str, slot_tags_str, sentence = line_splits
+            else:
+                intent_str, sentence = line_splits
+                slot_tags_str = ""
+
+            if intent_str not in intents_list:
+                intents_list[intent_str] = len(intents_list)
+                outfiles['dict_intents'].write(f'{intent_str}\n')
+
+            if ignore_prev_intent:
+                start_token = 2
+            else:
+                start_token = 1
+
+            if do_lower_case:
+                sentence = sentence.lower()
+            sentence_cld = " ".join(sentence.strip().split()[start_token:-1])
+            outfiles[mode].write(f'{sentence_cld}\t' f'{str(intents_list[intent_str])}\n')
+
+            slot_tags_list = []
+            if slot_tags_str.strip():
+                slot_tags = slot_tags_str.strip().split(",")
+                for st in slot_tags:
+                    if not st.strip():
+                        continue
+                    [start_i, end_i, slot_name] = st.strip().split(":")
+                    slot_tags_list.append([int(start_i), int(end_i), slot_name])
+                    if slot_name not in slots_list:
+                        slots_list[slot_name] = len(slots_list)
+                        slots_list_all[f'B-{slot_name}'] = len(slots_list_all)
+                        slots_list_all[f'I-{slot_name}'] = len(slots_list_all)
+                        outfiles['dict_slots'].write(f'B-{slot_name}\n')
+                        outfiles['dict_slots'].write(f'I-{slot_name}\n')
+
+            slot_tags_list.sort(key=lambda x: x[0])
+            slots = []
+            processed_index = 0
+            for tag_start, tag_end, tag_str in slot_tags_list:
+                if tag_start > processed_index:
+                    words_list = sentence[processed_index:tag_start].strip().split()
+                    slots.extend([str(slots_list_all['O'])] * len(words_list))
+                words_list = sentence[tag_start:tag_end].strip().split()
+                slots.append(str(slots_list_all[f'B-{tag_str}']))
+                slots.extend([str(slots_list_all[f'I-{tag_str}'])] * (len(words_list) - 1))
+                processed_index = tag_end
+
+            if processed_index < len(sentence):
+                words_list = sentence[processed_index:].strip().split()
+                slots.extend([str(slots_list_all['O'])] * len(words_list))
+
+            slots = slots[1:-1]
+            slot = ' '.join(slots)
+            outfiles[mode + '_slots'].write(slot + '\n')
+
+        outfiles[mode + '_slots'].close()
+        outfiles[mode].close()
+
+    outfiles['dict_slots'].close()
+    outfiles['dict_intents'].close()
+
+    return outfold
+
+
+if __name__ == "__main__":
+    # Parse the command-line arguments.
+    parser = argparse.ArgumentParser(description="Process and convert datasets into NeMo\'s format.")
+    parser.add_argument(
+        "--dataset_name",
+        required=True,
+        type=str,
+        choices=['atis', 'snips', 'jarvis', 'dialogflow', 'mturk-processed', 'assistant'],
+    )
+    parser.add_argument(
+        "--source_data_dir", required=True, type=str, help='path to the folder containing the dataset files'
+    )
+    parser.add_argument("--target_data_dir", required=True, type=str, help='path to save the processed dataset')
+    parser.add_argument("--do_lower_case", action='store_true')
+    parser.add_argument(
+        "--ignore_prev_intent",
+        action='store_true',
+        help='ignores previous intent while importing datasets in jarvis\'s format',
+    )
+    parser.add_argument(
+        "--use_full_dataset",
+        action='store_true',
+        help='using full 25K dataset versus smaller 10K version for assistant\'s dataset',
+    )
+
+    args = parser.parse_args()
+
+    dataset_name = args.dataset_name
+    source_dir = args.source_data_dir
+    target_dir = args.target_data_dir
+
+    if not exists(source_dir):
+        raise FileNotFoundError(f"{source_dir} does not exist.")
+
+    if dataset_name == 'atis':
+        process_atis(infold=source_dir, outfold=target_dir, do_lower_case=args.do_lower_case)
+    elif dataset_name == 'snips':
+        process_snips(infold=source_dir, outfold=target_dir, do_lower_case=args.do_lower_case)
+    elif dataset_name == 'jarvis':
+        process_jarvis_datasets(
+            infold=source_dir,
+            outfold=target_dir,
+            modes=["train", "test", "dev"],
+            do_lower_case=args.do_lower_case,
+            ignore_prev_intent=args.ignore_prev_intent,
+        )
+    elif dataset_name == 'dialogflow':
+        process_dialogflow(infold=source_dir, outfold=target_dir)
+    elif dataset_name == 'mturk-processed':
+        process_mturk(infold=source_dir, outfold=target_dir)
+    elif dataset_name == 'assistant':
+        process_assistant(infold=source_dir, outfold=target_dir, use_full_dataset=args.use_full_dataset)
+    else:
+        raise ValueError(f'Dataset {dataset_name} is not supported.')
diff --git a/examples/nlp/intent_detection_slot_tagging/data/mturk_utils.py b/examples/nlp/intent_detection_slot_tagging/data/mturk_utils.py
new file mode 100644
index 000000000000..4035ec4e235d
--- /dev/null
+++ b/examples/nlp/intent_detection_slot_tagging/data/mturk_utils.py
@@ -0,0 +1,197 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import json
+import os
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import (
+    DATABASE_EXISTS_TMP,
+    if_exist,
+    partition_data,
+    read_csv,
+    write_files,
+)
+
+__all__ = ['process_mturk', 'process_intent_slot_mturk', 'get_intents_mturk', 'get_slot_labels']
+
+
+def process_mturk(infold, outfold, modes=['train', 'test']):
+    if not os.path.exists(infold):
+        link = 'www.mturk.com'
+        raise ValueError(
+            f'Data not found at {infold}. ' f'Export your mturk data from' f'{link} and unzip at {infold}.'
+        )
+
+    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
+        logging.info(DATABASE_EXISTS_TMP.format('mturk', outfold))
+        return
+
+    logging.info(f'Processing dataset from mturk and storing at {outfold}')
+
+    os.makedirs(outfold, exist_ok=True)
+
+    classification_data_file = f'{infold}/classification.csv'
+    annotation_data_file = f'{infold}/annotation.manifest'
+
+    if not os.path.exists(classification_data_file):
+        raise FileNotFoundError(f'File not found ' f'at {classification_data_file}')
+
+    if not os.path.exists(annotation_data_file):
+        raise FileNotFoundError(f'File not found at {annotation_data_file}')
+
+    utterances = []
+    utterances = read_csv(classification_data_file)
+
+    # This function assumes that the intent classification data has been
+    # reviewed and cleaned and only one label per utterance is present.
+    agreed_all, intent_names = get_intents_mturk(utterances, outfold)
+
+    with open(annotation_data_file, 'r') as f:
+        slot_annotations = f.readlines()
+
+    # This function assumes that the preprocess step would have made
+    # the task_name of all the annotations generic
+    task_name = 'retail-combined'
+
+    # It is assumed that every utterances will have corresponding
+    # slot annotation information
+    if len(slot_annotations) < len(agreed_all):
+        raise ValueError(f'Every utterance must have corresponding' f'slot annotation information')
+
+    slot_labels, intent_queries, slot_tags = process_intent_slot_mturk(
+        slot_annotations, agreed_all, intent_names, task_name
+    )
+
+    assert len(slot_tags) == len(intent_queries)
+
+    dev_split = 0.1
+
+    train_queries, train_slots, test_queries, test_slots = partition_data(intent_queries, slot_tags, split=dev_split)
+
+    write_files(train_queries, f'{outfold}/train.tsv')
+    write_files(train_slots, f'{outfold}/train_slots.tsv')
+
+    write_files(test_queries, f'{outfold}/test.tsv')
+    write_files(test_slots, f'{outfold}/test_slots.tsv')
+
+    write_files(slot_labels, f'{outfold}/dict.slots.csv')
+    write_files(intent_names, f'{outfold}/dict.intents.csv')
+
+
+def process_intent_slot_mturk(slot_annotations, agreed_all, intent_names, task_name):
+    slot_tags = []
+    inorder_utterances = []
+    all_labels = get_slot_labels(slot_annotations, task_name)
+    logging.info(f'agreed_all - {len(agreed_all)}')
+    logging.info(f'Slot annotations - {len(slot_annotations)}')
+
+    for annotation in slot_annotations[0:]:
+        an = json.loads(annotation)
+        utterance = an['source']
+        if len(utterance) > 2 and utterance.startswith('"') and utterance.endswith('"'):
+            utterance = utterance[1:-1]
+
+        if utterance in agreed_all:
+            entities = {}
+            annotated_entities = an[task_name]['annotations']['entities']
+            for i, each_anno in enumerate(annotated_entities):
+                entities[int(each_anno['startOffset'])] = i
+
+            lastptr = 0
+            slotlist = []
+            # sorting annotations by the start offset
+            for i in sorted(entities.keys()):
+                annotated_entities = an[task_name]['annotations']['entities']
+                tags = annotated_entities[entities.get(i)]
+                untagged_words = utterance[lastptr : tags['startOffset']]
+                for _ in untagged_words.split():
+                    slotlist.append(all_labels.get('O'))
+                anno_words = utterance[tags['startOffset'] : tags['endOffset']]
+                # tagging with the IOB format.
+                for j, _ in enumerate(anno_words.split()):
+                    if j == 0:
+                        b_slot = 'B-' + tags['label']
+                        slotlist.append(all_labels.get(b_slot))
+                    else:
+                        i_slot = 'I-' + tags['label']
+                        slotlist.append(all_labels.get(i_slot))
+                lastptr = tags['endOffset']
+
+            untagged_words = utterance[lastptr : len(utterance)]
+            for _ in untagged_words.split():
+                slotlist.append(all_labels.get('O'))
+
+            slotstr = ' '.join(slotlist)
+            slotstr = f'{slotstr.strip()}\n'
+
+            slot_tags.append(slotstr)
+            intent_num = intent_names.get(agreed_all.get(utterance))
+            query_text = f'{utterance.strip()}\t{intent_num}\n'
+            inorder_utterances.append(query_text)
+        # else:
+        #     logging.warning(utterance)
+
+    logging.info(f'inorder utterances - {len(inorder_utterances)}')
+
+    return all_labels, inorder_utterances, slot_tags
+
+
+def get_intents_mturk(utterances, outfold):
+    intent_names = {}
+    intent_count = 0
+
+    agreed_all = {}
+
+    logging.info('Printing all intent_labels')
+    intent_dict = f'{outfold}/dict.intents.csv'
+    if os.path.exists(intent_dict):
+        with open(intent_dict, 'r') as f:
+            for intent_name in f.readlines():
+                intent_names[intent_name.strip()] = intent_count
+                intent_count += 1
+    logging.info(intent_names)
+
+    for i, utterance in enumerate(utterances[1:]):
+
+        if utterance[1] not in agreed_all:
+            agreed_all[utterance[0]] = utterance[1]
+
+        if utterance[1] not in intent_names:
+            intent_names[utterance[1]] = intent_count
+            intent_count += 1
+
+    logging.info(f'Total number of utterance samples: {len(agreed_all)}')
+
+    return agreed_all, intent_names
+
+
+def get_slot_labels(slot_annotations, task_name):
+    slot_labels = json.loads(slot_annotations[0])
+
+    all_labels = {}
+    count = 0
+    # Generating labels with the IOB format.
+    for label in slot_labels[task_name]['annotations']['labels']:
+        b_slot = 'B-' + label['label']
+        i_slot = 'I-' + label['label']
+        all_labels[b_slot] = str(count)
+        count += 1
+        all_labels[i_slot] = str(count)
+        count += 1
+    all_labels['O'] = str(count)
+
+    return all_labels
diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py
index 196a0e492055..c2d38b8e3c0f 100644
--- a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py
+++ b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer.py
@@ -18,50 +18,94 @@
 import os
 
 import numpy as np
-from sklearn.metrics import classification_report
-from transformers import BertTokenizer
+from sklearn.metrics import confusion_matrix
 
-import nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm
+import nemo
+import nemo.collections.nlp as nemo_nlp
 from nemo import logging
 from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import JointIntentSlotDataDesc
+from nemo.collections.nlp.nm.data_layers import BertJointIntentSlotDataLayer
+from nemo.collections.nlp.nm.trainables.joint_intent_slot import JointIntentSlotClassifier
+from nemo.collections.nlp.utils.callback_utils import get_classification_report, get_f1_scores
+from nemo.collections.nlp.utils.evaluation_utils import (
+    analyze_confusion_matrix,
+    errors_per_class,
+    log_misclassified_queries,
+    log_misclassified_slots,
+)
+
+
+def concatenate(lists):
+    return np.concatenate([t.cpu() for t in lists])
+
+
+def get_preds(logits):
+    return np.argmax(logits, 1)
+
 
 # Parsing arguments
-parser = argparse.ArgumentParser(description='Joint-intent BERT')
-parser.add_argument("--local_rank", default=None, type=int)
-parser.add_argument("--batch_size", default=128, type=int)
-parser.add_argument("--max_seq_length", default=50, type=int)
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
-parser.add_argument("--dataset_name", default='snips-all', type=str)
-parser.add_argument("--data_dir", default='data/nlu/snips', type=str)
-parser.add_argument("--work_dir", required=True, help="your checkpoint folder", type=str)
+parser = argparse.ArgumentParser(description='Batch inference for intent detection/slot tagging with BERT')
+parser.add_argument("--checkpoint_dir", required=True, help="your checkpoint folder", type=str)
+parser.add_argument("--data_dir", default='data/atis', type=str)
 parser.add_argument("--eval_file_prefix", default='test', type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--do_lower_case", action='store_false')
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
+parser.add_argument("--batch_size", default=128, type=int)
+parser.add_argument("--max_seq_length", default=64, type=int)
+parser.add_argument("--local_rank", default=None, type=int)
 
 args = parser.parse_args()
 
 if not os.path.exists(args.data_dir):
     raise ValueError(f'Data not found at {args.data_dir}')
 
-nf = nemo.core.NeuralModuleFactory(
-    backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=None
+nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank)
+
+pretrained_bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name, config=args.bert_config, vocab=args.vocab_file
 )
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo_nlp.huggingface.BERT.list_pretrained_models()
-"""
-pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(
-    pretrained_model_name=args.pretrained_bert_model
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
 )
+
 hidden_size = pretrained_bert_model.hidden_size
-tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
 
-data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case, args.dataset_name)
+data_desc = JointIntentSlotDataDesc(data_dir=args.data_dir)
 
 # Evaluation pipeline
 logging.info("Loading eval data...")
-data_layer = nemo.collections.nlp.nm.data_layers.joint_intent_slot_datalayer.BertJointIntentSlotDataLayer(
+data_layer = BertJointIntentSlotDataLayer(
     input_file=f'{data_desc.data_dir}/{args.eval_file_prefix}.tsv',
     slot_file=f'{data_desc.data_dir}/{args.eval_file_prefix}_slots.tsv',
     pad_label=data_desc.pad_label,
@@ -71,55 +115,97 @@
     batch_size=args.batch_size,
 )
 
-classifier = nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm.JointIntentSlotClassifier(
+classifier = JointIntentSlotClassifier(
     hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots
 )
 
-(ids, type_ids, input_mask, loss_mask, subtokens_mask, intents, slots) = data_layer()
+input_data = data_layer()
 
-hidden_states = pretrained_bert_model(input_ids=ids, token_type_ids=type_ids, attention_mask=input_mask)
+hidden_states = pretrained_bert_model(
+    input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
+)
 intent_logits, slot_logits = classifier(hidden_states=hidden_states)
 
 ###########################################################################
-
-
 # Instantiate an optimizer to perform `infer` action
 evaluated_tensors = nf.infer(
-    tensors=[intent_logits, slot_logits, loss_mask, subtokens_mask, intents, slots], checkpoint_dir=args.work_dir
+    tensors=[
+        intent_logits,
+        slot_logits,
+        input_data.loss_mask,
+        input_data.subtokens_mask,
+        input_data.intents,
+        input_data.slots,
+    ],
+    checkpoint_dir=args.checkpoint_dir,
 )
 
-
-def concatenate(lists):
-    return np.concatenate([t.cpu() for t in lists])
-
-
-def get_preds(logits):
-    return np.argmax(logits, 1)
-
-
-intent_logits, slot_logits, loss_mask, subtokens_mask, intents, slot_labels = [
+# --- analyse of the results ---
+intent_logits, slot_logits, loss_mask, subtokens_mask, intent_labels, slot_labels_unmasked = [
     concatenate(tensors) for tensors in evaluated_tensors
 ]
 
-pred_intents = np.argmax(intent_logits, 1)
-logging.info('Intent prediction results')
-
-intents = np.asarray(intents)
-pred_intents = np.asarray(pred_intents)
-intent_accuracy = sum(intents == pred_intents) / len(pred_intents)
-logging.info(f'Intent accuracy: {intent_accuracy}')
-logging.info(classification_report(intents, pred_intents))
-
-slot_preds = np.argmax(slot_logits, axis=2)
-slot_preds_list, slot_labels_list = [], []
+# slot accuracies
+logging.info('Slot Prediction Results:')
+slot_preds_unmasked = np.argmax(slot_logits, axis=2)
 subtokens_mask = subtokens_mask > 0.5
-for i, sp in enumerate(slot_preds):
-    slot_preds_list.extend(list(slot_preds[i][subtokens_mask[i]]))
-    slot_labels_list.extend(list(slot_labels[i][subtokens_mask[i]]))
-
-logging.info('Slot prediction results')
-slot_labels_list = np.asarray(slot_labels_list)
-slot_preds_list = np.asarray(slot_preds_list)
-slot_accuracy = sum(slot_labels_list == slot_preds_list) / len(slot_labels_list)
-logging.info(f'Slot accuracy: {slot_accuracy}')
-logging.info(classification_report(slot_labels_list, slot_preds_list))
+slot_labels = slot_labels_unmasked[subtokens_mask]
+slot_preds = slot_preds_unmasked[subtokens_mask]
+slot_accuracy = np.mean(slot_labels == slot_preds)
+logging.info(f'Slot Accuracy: {slot_accuracy}')
+f1_scores = get_f1_scores(slot_labels, slot_preds, average_modes=['weighted', 'macro', 'micro'])
+for k, v in f1_scores.items():
+    logging.info(f'{k}: {v}')
+
+logging.info(f'\n {get_classification_report(slot_labels, slot_preds, label_ids=data_desc.slots_label_ids)}')
+
+# intent accuracies
+logging.info('Intent Prediction Results:')
+intent_preds = np.asarray(np.argmax(intent_logits, 1))
+intent_labels = np.asarray(intent_labels)
+intent_accuracy = np.mean(intent_labels == intent_preds)
+logging.info(f'Intent Accuracy: {intent_accuracy}')
+f1_scores = get_f1_scores(intent_labels, intent_preds, average_modes=['weighted', 'macro', 'micro'])
+for k, v in f1_scores.items():
+    logging.info(f'{k}: {v}')
+
+logging.info(f'\n {get_classification_report(intent_labels, intent_preds, label_ids=data_desc.intents_label_ids)}')
+
+# print queries with wrong intent:
+queries = open(f'{data_desc.data_dir}/{args.eval_file_prefix}.tsv', 'r').readlines()[1:]
+intent_dict = open(data_desc.intent_dict_file, 'r').read().splitlines()
+log_misclassified_queries(intent_labels, intent_preds, queries, intent_dict, limit=30)
+
+# print queries with wrong slots:
+slot_dict = open(data_desc.slot_dict_file, 'r').read().splitlines()
+log_misclassified_slots(
+    intent_labels,
+    intent_preds,
+    slot_labels_unmasked,
+    slot_preds_unmasked,
+    subtokens_mask,
+    queries,
+    intent_dict,
+    slot_dict,
+    limit=30,
+)
+
+# analyze confusion matrices
+intent_max_pairs = 20
+logging.info('')
+logging.info(f'*** Most Confused Intents (limit {intent_max_pairs}) ***')
+cm = confusion_matrix(intent_labels, intent_preds)
+analyze_confusion_matrix(cm, intent_dict, intent_max_pairs)
+
+logging.info('')
+logging.info(f'\*** Intent errors per class (in both directions) ***')
+errors_per_class(cm, intent_dict)
+
+slot_max_pairs = 20
+logging.info('')
+logging.info(f'*** Most Confused Slots (limit {slot_max_pairs}) ***')
+cm = confusion_matrix(slot_labels, slot_preds, np.arange(len(slot_dict)))
+analyze_confusion_matrix(cm, slot_dict, slot_max_pairs)
+
+# check potentially problematic slots - when I- label comes after different B- label
+# check_problematic_slots(slot_labels, slot_dict)
diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer_b1.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer_b1.py
index 84ab723c94a8..1b1ae2c423ae 100644
--- a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer_b1.py
+++ b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_infer_b1.py
@@ -17,64 +17,98 @@
 import argparse
 
 import numpy as np
-from transformers import BertTokenizer
 
+import nemo
 import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm
-from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import JointIntentSlotDataDesc
-from nemo.collections.nlp.utils.common_nlp_utils import read_intent_slot_outputs
+from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import (
+    JointIntentSlotDataDesc,
+    read_intent_slot_outputs,
+)
+from nemo.collections.nlp.nm.data_layers import BertJointIntentSlotInferDataLayer
+from nemo.collections.nlp.nm.trainables import JointIntentSlotClassifier
 
 # Parsing arguments
-parser = argparse.ArgumentParser(description='Joint-intent BERT')
-parser.add_argument("--max_seq_length", default=50, type=int)
-parser.add_argument("--fc_dropout", default=0.1, type=float)
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
-parser.add_argument("--dataset_name", default='snips-all', type=str)
-parser.add_argument("--data_dir", default='data/nlu/snips', type=str)
-parser.add_argument("--query", default='please turn on the light', type=str)
-parser.add_argument("--work_dir", required=True, help="your checkpoint folder", type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--do_lower_case", action='store_false')
+parser = argparse.ArgumentParser(description='Single query inference for intent detection/slot tagging with BERT')
+parser.add_argument("--query", required=True, type=str)
+parser.add_argument("--data_dir", default='data/atis', type=str)
+parser.add_argument("--checkpoint_dir", required=True, help="path to your checkpoint folder", type=str)
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
+parser.add_argument("--max_seq_length", default=64, type=int)
 
 args = parser.parse_args()
 
-nf = nemo.core.NeuralModuleFactory(
-    backend=nemo.core.Backend.PyTorch, optimization_level=args.amp_opt_level, log_dir=None
+nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch)
+
+pretrained_bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name, config=args.bert_config, vocab=args.vocab_file
+)
+
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
 )
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo_nlp.BERT.list_pretrained_models()
-"""
-pretrained_bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
 hidden_size = pretrained_bert_model.hidden_size
 
-data_desc = JointIntentSlotDataDesc(args.data_dir, args.do_lower_case, args.dataset_name)
+data_desc = JointIntentSlotDataDesc(data_dir=args.data_dir)
 
 query = args.query
 if args.do_lower_case:
     query = query.lower()
 
-data_layer = nemo.collections.nlp.nm.data_layers.joint_intent_slot_datalayer.BertJointIntentSlotInferDataLayer(
+data_layer = BertJointIntentSlotInferDataLayer(
     queries=[query], tokenizer=tokenizer, max_seq_length=args.max_seq_length, batch_size=1
 )
 
 # Create sentence classification loss on top
-classifier = nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm.JointIntentSlotClassifier(
-    hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout
+classifier = JointIntentSlotClassifier(
+    hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=0.0
 )
 
-ids, type_ids, input_mask, loss_mask, subtokens_mask = data_layer()
+input_data = data_layer()
 
-hidden_states = pretrained_bert_model(input_ids=ids, token_type_ids=type_ids, attention_mask=input_mask)
+hidden_states = pretrained_bert_model(
+    input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
+)
 
 intent_logits, slot_logits = classifier(hidden_states=hidden_states)
 
 ###########################################################################
 
 
-evaluated_tensors = nf.infer(tensors=[intent_logits, slot_logits, subtokens_mask], checkpoint_dir=args.work_dir)
+evaluated_tensors = nf.infer(
+    tensors=[intent_logits, slot_logits, input_data.subtokens_mask], checkpoint_dir=args.checkpoint_dir
+)
 
 
 def concatenate(lists):
diff --git a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py
index 0cbdb08f72cc..dbb6b350eb21 100644
--- a/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py
+++ b/examples/nlp/intent_detection_slot_tagging/joint_intent_slot_with_bert.py
@@ -14,118 +14,158 @@
 # limitations under the License.
 # =============================================================================
 
+"""
+This example is based on a model proposed by Q. Chen et al in 'BERT for Joint Intent Classification and Slot Filling'.
+https://arxiv.org/abs/1902.10909
+
+This example shows how to train an intent detection and slot tagging model using BERT based models as input encoder. \
+It can be used as Natural Language Understanding (NLU) module for goal-oriented dialogue systems.
+You may find more information on this example in https://nvidia.github.io/NeMo/nlp/joint_intent_slot_filling.html.
+"""
+
 import argparse
 import math
 import os
 
-import numpy as np
-from transformers import BertTokenizer
-
+import nemo
 import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.nm.data_layers.joint_intent_slot_datalayer
-import nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm
 from nemo import logging
+from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM
 from nemo.collections.nlp.callbacks.joint_intent_slot_callback import eval_epochs_done_callback, eval_iter_callback
 from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import JointIntentSlotDataDesc
+from nemo.collections.nlp.nm.data_layers import BertJointIntentSlotDataLayer
+from nemo.collections.nlp.nm.trainables import JointIntentSlotClassifier
+from nemo.core import CheckpointCallback, SimpleLossLoggerCallback
 from nemo.utils.lr_policies import get_lr_policy
 
 # Parsing arguments
-parser = argparse.ArgumentParser(description='Joint intent slot filling system with pretrained BERT')
-parser.add_argument("--local_rank", default=None, type=int)
+parser = argparse.ArgumentParser(description='Joint intent detection and slot filling with pre-trained BERT')
+parser.add_argument("--data_dir", default='data/atis', type=str)
+parser.add_argument("--work_dir", default='outputs', type=str)
+parser.add_argument("--checkpoint_dir", default=None, type=str)
+parser.add_argument(
+    '--pretrained_model_name',
+    default='bert-base-uncased',
+    type=str,
+    help='Name of the pre-trained model for the encoder',
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_checkpoint", default=None, type=str, help="Path to pretrained bert model")
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+
+parser.add_argument("--train_file_prefix", default='train', type=str)
+parser.add_argument("--eval_file_prefix", default='test', type=str)
+
+parser.add_argument("--num_epochs", default=10, type=int)
 parser.add_argument("--batch_size", default=128, type=int)
 parser.add_argument("--max_seq_length", default=50, type=int)
 parser.add_argument("--num_gpus", default=1, type=int)
-parser.add_argument("--num_epochs", default=10, type=int)
-parser.add_argument("--num_train_samples", default=-1, type=int)
-parser.add_argument("--num_eval_samples", default=-1, type=int)
-parser.add_argument("--lr_warmup_proportion", default=0.1, type=float)
+
+parser.add_argument("--optimizer_kind", default="adam", type=str)
+parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
 parser.add_argument("--lr", default=2e-5, type=float)
 parser.add_argument("--lr_policy", default="WarmupAnnealing", type=str)
+parser.add_argument("--lr_warmup_proportion", default=0.1, type=float)
 parser.add_argument("--weight_decay", default=0.01, type=float)
 parser.add_argument("--fc_dropout", default=0.1, type=float)
+
+parser.add_argument("--intent_loss_weight", default=0.6, type=float)
+parser.add_argument("--class_balancing", default="regular", type=str, choices=["regular", "weighted_loss"])
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "For tokenizer only applicable when tokenizer is build with vocab file",
+)
+parser.add_argument(
+    "--no_shuffle_data", action='store_false', dest="shuffle_data", help="Shuffle is enabled by default."
+)
+
 parser.add_argument("--ignore_start_end", action='store_false')
 parser.add_argument("--ignore_extra_tokens", action='store_false')
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
-parser.add_argument("--bert_checkpoint", default="", type=str)
-parser.add_argument("--bert_config", default="", type=str)
-parser.add_argument("--data_dir", default='data/nlu/atis', type=str)
-parser.add_argument("--dataset_name", default='atis', type=str)
-parser.add_argument("--train_file_prefix", default='train', type=str)
-parser.add_argument("--eval_file_prefix", default='test', type=str)
 parser.add_argument("--none_slot_label", default='O', type=str)
 parser.add_argument("--pad_label", default=-1, type=int)
-parser.add_argument("--work_dir", default='outputs', type=str)
+parser.add_argument("--num_train_samples", default=-1, type=int)
+parser.add_argument("--num_eval_samples", default=-1, type=int)
 parser.add_argument("--save_epoch_freq", default=1, type=int)
 parser.add_argument("--save_step_freq", default=-1, type=int)
-parser.add_argument("--optimizer_kind", default="adam", type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--do_lower_case", action='store_true')
-parser.add_argument("--shuffle_data", action='store_true')
-parser.add_argument("--intent_loss_weight", default=0.6, type=float)
-parser.add_argument("--class_balancing", default="regular", type=str, choices=["regular", "weighted_loss"])
+parser.add_argument("--local_rank", default=None, type=int)
 
 args = parser.parse_args()
 
 if not os.path.exists(args.data_dir):
     raise ValueError(f'Data not found at {args.data_dir}')
 
-work_dir = f'{args.work_dir}/{args.dataset_name.upper()}'
 nf = nemo.core.NeuralModuleFactory(
     backend=nemo.core.Backend.PyTorch,
     local_rank=args.local_rank,
     optimization_level=args.amp_opt_level,
-    log_dir=work_dir,
+    log_dir=args.work_dir,
+    checkpoint_dir=args.checkpoint_dir,
     create_tb_writer=True,
     files_to_copy=[__file__],
     add_time_to_log_dir=True,
 )
 
-tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo_nlp.huggingface.BERT.list_pretrained_models()
-"""
-if args.bert_checkpoint and args.bert_config:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(config_filename=args.bert_config)
-    pretrained_bert_model.restore_from(args.bert_checkpoint)
-else:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.huggingface.BERT(
-        pretrained_model_name=args.pretrained_bert_model
-    )
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
 
-hidden_size = pretrained_bert_model.hidden_size
+hidden_size = model.hidden_size
 
 data_desc = JointIntentSlotDataDesc(
-    args.data_dir, args.do_lower_case, args.dataset_name, args.none_slot_label, args.pad_label
+    data_dir=args.data_dir, none_slot_label=args.none_slot_label, pad_label=args.pad_label
 )
 
 # Create sentence classification loss on top
-classifier = nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm.JointIntentSlotClassifier(
+classifier = JointIntentSlotClassifier(
     hidden_size=hidden_size, num_intents=data_desc.num_intents, num_slots=data_desc.num_slots, dropout=args.fc_dropout
 )
 
 if args.class_balancing == 'weighted_loss':
-    # Using weighted loss will enable weighted loss for both intents and slots
-    # Use the intent_loss_weight hyperparameter to adjust intent loss to
-    # prevent overfitting or underfitting.
-    loss_fn = nemo_nlp.nm.losses.JointIntentSlotLoss(
-        num_slots=data_desc.num_slots,
-        slot_classes_loss_weights=data_desc.slot_weights,
-        intent_classes_loss_weights=data_desc.intent_weights,
-        intent_loss_weight=args.intent_loss_weight,
-    )
+    # To tackle imbalanced classes, you may use weighted loss
+    intent_loss_fn = CrossEntropyLossNM(logits_ndim=2, weight=data_desc.intent_weights)
+    slot_loss_fn = CrossEntropyLossNM(logits_ndim=3, weight=data_desc.slot_weights)
 else:
-    loss_fn = nemo_nlp.nm.losses.JointIntentSlotLoss(num_slots=data_desc.num_slots)
+    intent_loss_fn = CrossEntropyLossNM(logits_ndim=2)
+    slot_loss_fn = CrossEntropyLossNM(logits_ndim=3)
+
+total_loss_fn = LossAggregatorNM(num_inputs=2, weights=[args.intent_loss_weight, 1.0 - args.intent_loss_weight])
 
 
-def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mode='train'):
-    logging.info(f"Loading {mode} data...")
-    data_file = f'{data_desc.data_dir}/{mode}.tsv'
-    slot_file = f'{data_desc.data_dir}/{mode}_slots.tsv'
-    shuffle = args.shuffle_data if mode == 'train' else False
+def create_pipeline(num_samples=-1, batch_size=32, data_prefix='train', is_training=True, num_gpus=1):
+    logging.info(f"Loading {data_prefix} data...")
+    data_file = f'{data_desc.data_dir}/{data_prefix}.tsv'
+    slot_file = f'{data_desc.data_dir}/{data_prefix}_slots.tsv'
+    shuffle = args.shuffle_data if is_training else False
 
-    data_layer = nemo.collections.nlp.nm.data_layers.joint_intent_slot_datalayer.BertJointIntentSlotDataLayer(
+    data_layer = BertJointIntentSlotDataLayer(
         input_file=data_file,
         slot_file=slot_file,
         pad_label=data_desc.pad_label,
@@ -136,12 +176,13 @@ def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mod
         batch_size=batch_size,
         ignore_extra_tokens=args.ignore_extra_tokens,
         ignore_start_end=args.ignore_start_end,
+        do_lower_case=args.do_lower_case,
     )
 
-    (ids, type_ids, input_mask, loss_mask, subtokens_mask, intents, slots) = data_layer()
+    input_data = data_layer()
     data_size = len(data_layer)
 
-    print(f'The length of data layer is {data_size}')
+    logging.info(f'The length of data layer is {data_size}')
 
     if data_size < batch_size:
         logging.warning("Batch_size is larger than the dataset size")
@@ -151,61 +192,79 @@ def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mod
     steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
     logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
-    hidden_states = pretrained_bert_model(input_ids=ids, token_type_ids=type_ids, attention_mask=input_mask)
+    hidden_states = model(
+        input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
+    )
 
     intent_logits, slot_logits = classifier(hidden_states=hidden_states)
 
-    loss = loss_fn(
-        intent_logits=intent_logits, slot_logits=slot_logits, loss_mask=loss_mask, intents=intents, slots=slots
-    )
+    intent_loss = intent_loss_fn(logits=intent_logits, labels=input_data.intents)
+    slot_loss = slot_loss_fn(logits=slot_logits, labels=input_data.slots, loss_mask=input_data.loss_mask)
+    total_loss = total_loss_fn(loss_1=intent_loss, loss_2=slot_loss)
 
-    if mode == 'train':
-        tensors_to_evaluate = [loss, intent_logits, slot_logits]
+    if is_training:
+        tensors_to_evaluate = [total_loss, intent_loss, slot_loss]
     else:
-        tensors_to_evaluate = [intent_logits, slot_logits, intents, slots, subtokens_mask]
+        tensors_to_evaluate = [
+            intent_logits,
+            slot_logits,
+            input_data.intents,
+            input_data.slots,
+            input_data.subtokens_mask,
+        ]
 
-    return tensors_to_evaluate, loss, steps_per_epoch, data_layer
+    return tensors_to_evaluate, total_loss, steps_per_epoch, data_layer
 
 
-train_tensors, train_loss, steps_per_epoch, _ = create_pipeline(
-    args.num_train_samples,
+train_tensors, train_loss, train_steps_per_epoch, _ = create_pipeline(
+    num_samples=args.num_train_samples,
     batch_size=args.batch_size,
+    data_prefix=args.train_file_prefix,
+    is_training=True,
     num_gpus=args.num_gpus,
-    local_rank=args.local_rank,
-    mode=args.train_file_prefix,
 )
-eval_tensors, _, _, data_layer = create_pipeline(
-    args.num_eval_samples,
+eval_tensors, _, _, eval_data_layer = create_pipeline(
+    num_samples=args.num_eval_samples,
     batch_size=args.batch_size,
+    data_prefix=args.eval_file_prefix,
+    is_training=False,
     num_gpus=args.num_gpus,
-    local_rank=args.local_rank,
-    mode=args.eval_file_prefix,
 )
 
 # Create callbacks for train and eval modes
-train_callback = nemo.core.SimpleLossLoggerCallback(
+train_callback = SimpleLossLoggerCallback(
     tensors=train_tensors,
-    print_func=lambda x: str(np.round(x[0].item(), 3)),
+    print_func=lambda x: logging.info(
+        f'Total Loss:{str(round(x[0].item(), 3))}, '
+        f'Intent Loss:{str(round(x[1].item(), 3))}, '
+        f'Slot Tagging Loss:{str(round(x[2].item(), 3))}'
+    ),
     tb_writer=nf.tb_writer,
-    get_tb_values=lambda x: [["loss", x[0]]],
-    step_freq=steps_per_epoch,
+    get_tb_values=lambda x: [["total_loss", x[0]], ["intent_loss", x[1]], ["slot_loss", x[2]]],
+    step_freq=train_steps_per_epoch,
 )
 
 eval_callback = nemo.core.EvaluatorCallback(
     eval_tensors=eval_tensors,
-    user_iter_callback=lambda x, y: eval_iter_callback(x, y, data_layer),
-    user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),
+    user_iter_callback=lambda x, y: eval_iter_callback(x, y),
+    user_epochs_done_callback=lambda x: eval_epochs_done_callback(
+        x,
+        intents_label_ids=data_desc.intents_label_ids,
+        slots_label_ids=data_desc.slots_label_ids,
+        graph_fold=f'{nf.work_dir}/graphs',
+        normalize_cm=True,
+    ),
     tb_writer=nf.tb_writer,
-    eval_step=steps_per_epoch,
+    eval_step=train_steps_per_epoch,
 )
 
 # Create callback to save checkpoints
-ckpt_callback = nemo.core.CheckpointCallback(
+ckpt_callback = CheckpointCallback(
     folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
 )
 
 lr_policy_fn = get_lr_policy(
-    args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
+    args.lr_policy, total_steps=args.num_epochs * train_steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
 )
 
 nf.train(
diff --git a/examples/nlp/language_modeling/BERTPretrainingTutorial.ipynb b/examples/nlp/language_modeling/BERTPretrainingTutorial.ipynb
index b46a87a77079..cfbe78c4a707 100644
--- a/examples/nlp/language_modeling/BERTPretrainingTutorial.ipynb
+++ b/examples/nlp/language_modeling/BERTPretrainingTutorial.ipynb
@@ -7,6 +7,15 @@
     "### Step 1 Download and prepare data"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DATA_DIR = 'PATH_TO_THE_DATA_DIR'"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -15,7 +24,7 @@
    "source": [
     "# This example is for demonstration purposes\n",
     "# Please refer to the corresponding NLP tutorial on NeMo documentation\n",
-    "! ../scripts/get_wkt2.sh"
+    "! bash get_wkt2.sh $DATA_DIR"
    ]
   },
   {
@@ -25,7 +34,7 @@
    "outputs": [],
    "source": [
     "# verify data is there \n",
-    "! ls -l data/lm/wikitext-2"
+    "! ls -l $DATA_DIR/wikitext-2"
    ]
   },
   {
@@ -35,7 +44,7 @@
    "outputs": [],
    "source": [
     "# Prepare tokenization model\n",
-    "! python ../scripts/create_vocab.py --train_path=data/lm/wikitext-2/train.txt"
+    "! python create_vocab.py --train_path=$DATA_DIR/wikitext-2/train.txt"
    ]
   },
   {
@@ -54,7 +63,7 @@
     "import os\n",
     "import torch\n",
     "import nemo\n",
-    "\n",
+    "from nemo import logging\n",
     "\n",
     "import nemo.collections.nlp as nemo_nlp\n",
     "from nemo.collections.nlp.callbacks.lm_bert_callback import eval_iter_callback, \\\n",
@@ -109,14 +118,7 @@
    "source": [
     "# tokenizer.model file was created during Step 1\n",
     "tokenizer = nemo_nlp.data.SentencePieceTokenizer(model_path=\"tokenizer.model\")\n",
-    "special_tokens = {\n",
-    "            \"sep_token\": \"[SEP]\",\n",
-    "            \"pad_token\": \"[PAD]\",\n",
-    "            \"bos_token\": \"[CLS]\",\n",
-    "            \"mask_token\": \"[MASK]\",\n",
-    "            \"eos_token\": \"[SEP]\",\n",
-    "            \"cls_token\": \"[CLS]\",\n",
-    "        }\n",
+    "special_tokens = nemo_nlp.data.get_bert_special_tokens('bert')\n",
     "tokenizer.add_special_tokens(special_tokens)"
    ]
   },
@@ -155,7 +157,7 @@
     "                                          num_classes=tokenizer.vocab_size,\n",
     "                                              activation=HIDDEN_ACT,\n",
     "                                          log_softmax=True)\n",
-    "mlm_loss = nemo_nlp.nm.losses.MaskedLanguageModelingLossNM()\n",
+    "mlm_loss = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss()\n",
     "\n",
     "# Next Sentence Prediciton Loss\n",
     "nsp_classifier = nemo_nlp.nm.trainables.SequenceClassifier(D_MODEL,\n",
@@ -163,9 +165,9 @@
     "                                             num_layers=2,\n",
     "                                             activation='tanh',\n",
     "                                             log_softmax=False)\n",
-    "nsp_loss = nemo.backends.pytorch.common.CrossEntropyLoss()\n",
+    "nsp_loss = nemo.backends.pytorch.common.CrossEntropyLossNM()\n",
     "\n",
-    "bert_loss = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)"
+    "bert_loss = nemo.backends.pytorch.common.LossAggregatorNM(num_inputs=2)"
    ]
   },
   {
@@ -174,21 +176,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "train_data_layer = nemo_nlp.nm.data_layers.BertPretrainingDataLayer(\n",
     "    tokenizer=tokenizer,\n",
-    "    dataset=os.path.join(\"data/lm/wikitext-2\", \"train.txt\"),\n",
+    "    dataset=os.path.join(DATA_DIR, \"wikitext-2\", \"train.txt\"),\n",
     "    max_seq_length=MAX_SEQ_LENGTH,\n",
     "    mask_probability=MASK_PROBABILITY,\n",
-    "    batch_size=BATCH_SIZE\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    shuffle=True\n",
     ")\n",
     "\n",
     "eval_data_layer = nemo_nlp.nm.data_layers.BertPretrainingDataLayer(\n",
     "    tokenizer=tokenizer,\n",
-    "    dataset=os.path.join(\"data/lm/wikitext-2\", \"valid.txt\"),\n",
+    "    dataset=os.path.join(DATA_DIR, \"wikitext-2\", \"valid.txt\"),\n",
     "    max_seq_length=MAX_SEQ_LENGTH,\n",
     "    mask_probability=MASK_PROBABILITY,\n",
-    "    batch_size=BATCH_SIZE_EVAL\n",
+    "    batch_size=BATCH_SIZE_EVAL,\n",
+    "    shuffle=False\n",
     ")"
    ]
   },
@@ -213,7 +216,7 @@
     "                           attention_mask=input_data.input_mask)\n",
     "\n",
     "mlm_logits = mlm_classifier(hidden_states=hidden_states)\n",
-    "t_mlm_loss = mlm_loss(logits=mlm_logits, output_ids=input_data.output_ids, output_mask=input_data.output_mask)\n",
+    "t_mlm_loss = mlm_loss(logits=mlm_logits, labels=input_data.output_ids, output_mask=input_data.output_mask)\n",
     "\n",
     "nsp_logits = nsp_classifier(hidden_states=hidden_states)\n",
     "t_nsp_loss = nsp_loss(logits=nsp_logits, labels=input_data.labels)\n",
@@ -235,7 +238,7 @@
     "                           attention_mask=input_data_eval.input_mask)\n",
     "\n",
     "e_mlm_logits = mlm_classifier(hidden_states=e_hidden_states)\n",
-    "e_mlm_loss = mlm_loss(logits=e_mlm_logits, output_ids=input_data_eval.output_ids, output_mask=input_data_eval.output_mask)\n",
+    "e_mlm_loss = mlm_loss(logits=e_mlm_logits, labels=input_data_eval.output_ids, output_mask=input_data_eval.output_mask)\n",
     "\n",
     "e_nsp_logits = nsp_classifier(hidden_states=e_hidden_states)\n",
     "e_nsp_loss = nsp_loss(logits=e_nsp_logits, labels=input_data_eval.labels)\n",
@@ -251,7 +254,7 @@
    "source": [
     "callback_loss = nemo.core.SimpleLossLoggerCallback(\n",
     "    tensors=[loss],\n",
-    "    print_func=lambda x: print(\"Loss: {:.3f}\".format(x[0].item())))\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())))\n",
     "\n",
     "train_data_size = len(train_data_layer)\n",
     "\n",
@@ -259,7 +262,7 @@
     "# train_data_size / (batch_size * batches_per_step * num_gpus)\n",
     "steps_per_epoch = int(train_data_size / (BATCHES_PER_STEP * BATCH_SIZE))\n",
     "\n",
-    "callback_test = nemo.core.EvaluatorCallback(\n",
+    "callback_eval = nemo.core.EvaluatorCallback(\n",
     "    eval_tensors=[e_mlm_loss, e_nsp_loss],\n",
     "    user_iter_callback=eval_iter_callback,\n",
     "    user_epochs_done_callback=eval_epochs_done_callback,\n",
@@ -270,24 +273,13 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[NeMo I 2020-02-12 12:08:08 callbacks:196] Step: 300\n",
-      "Loss: 6.991\n",
-      "[NeMo I 2020-02-12 12:08:08 callbacks:211] Step time: 0.13242316246032715 seconds\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "lr_policy = CosineAnnealing(NUM_EPOCHS * steps_per_epoch,\n",
     "                            warmup_ratio=LR_WARMUP_PROPORTION)\n",
     "neural_factory.train(tensors_to_optimize=[loss],\n",
     "                lr_policy=lr_policy,\n",
-    "                callbacks=[callback_loss, callback_test],\n",
-    "                #callbacks=[callback_loss],\n",
+    "                callbacks=[callback_loss, callback_eval],\n",
     "                batches_per_step=BATCHES_PER_STEP,\n",
     "                optimizer=OPTIMIZER,\n",
     "                optimization_params={\n",
@@ -298,13 +290,6 @@
     "                    \"grad_norm_clip\": None\n",
     "                })"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -323,7 +308,16 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.7.3"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
   }
  },
  "nbformat": 4,
diff --git a/examples/nlp/language_modeling/bert_pretraining.py b/examples/nlp/language_modeling/bert_pretraining.py
index 857556b89b07..c458d41ad3e4 100644
--- a/examples/nlp/language_modeling/bert_pretraining.py
+++ b/examples/nlp/language_modeling/bert_pretraining.py
@@ -15,11 +15,11 @@
 # =============================================================================
 """
 
-To pretrain BERT on raw text dataset run
+To pretrain BERT on raw uncased text dataset run
 python bert_pretraining.py \
 --amp_opt_level "O0" \
---data_dir data/lm/wikitext-2 \
---dataset_name wikitext-2 \
+--train_data path_to/wikitext-2/train.txt \
+--eval_data path_to/wikitext-2/valid.txt \
 --work_dir outputs/bert_lm \
 --batch_size 64 \
 --lr 0.01 \
@@ -36,27 +36,28 @@
 --num_attention_heads 12 \
 --hidden_act "gelu" \
 --save_step_freq 200 \
+data_text \
+--dataset_name wikitext-2 \
 --num_epochs 10 \
 --sample_size 10000000 \
 --mask_probability 0.15 \
---short_seq_prob 0.1
+--short_seq_prob 0.1 \
 
 To pretrain BERT large on preprocessed dataset,
 download and preprocess dataset from here:
 https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/
 Run the script:
 ./data/create_datasets_from_start.sh
-and extract data into data_dir
+and extract data into train_data and eval_data
 
-Then run BERT large on the 512 sequence length dataset
+Then run BERT large on dataset with a sequence length of 512 and a maximum of 80 masked tokens per sequence
 python -m torch.distributed.launch --nproc_per_node=8 bert_pretraining.py \
 --batch_size 8 \
 --config_file bert_config.json
---data_dir data_dir \
+--train_data train_data \
+--eval_data eval_data \
 --save_step_freq 200 \
---max_steps 1142857 \
 --num_gpus 8 \
---batches_per_step 2 \
 --amp_opt_level "O1" \
 --lr_policy SquareRootAnnealing \
 --beta1 0.9 \
@@ -64,16 +65,23 @@
 --lr_warmup_proportion 0.01 \
 --optimizer adam_w \
 --weight_decay 0.01 \
---lr 0.875e-4 \
---preprocessed_data
+--lr 0.4375e-4 \
+data_preprocessed \
+--max_predictions_per_seq 80 \
+--num_iters 2285714  
 
-350000 iterations on a DGX1 with 8 V100 32GB GPUs with AMP O1 optimization
-should finish under 5 days and yield an MRPC score of ACC/F1 85.05/89.35.
+BERT base uncased trained with 2285714 iterations on a DGX1 with 8 V100 GPUs with AMP O1 optimization
+should finish in 200 hours and yield EM/F1 of 82.74/89.79 on SQuADv1.1 and 71.24/74.32 on SQuADv2.0.
+On GLUE benchmark MRPC task the model achieves accuracy/F1 od 86.52/90.53.
+
+BERT large uncased trained with 2285714 iterations on a DGX1 with 8 V100 GPUs with AMP O1 optimization
+should finish in 410 hours and yield EM/F1 of 85.79/92.28 on SQuADv1.1 and 80.17/83.32 on SQuADv2.0.
+On GLUE benchmark MRPC task the model achieves accuracy/F1 od 88.7/91.96.
 
 More information about BERT pretraining can be found at 
 https://nvidia.github.io/NeMo/nlp/bert_pretraining.html
 
-Pretrained BERT models can be found at 
+Pretrained BERT models and model configuration files can be found at 
 https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedfornemo
 https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedfornemo
 https://ngc.nvidia.com/catalog/models/nvidia:bertbasecasedfornemo
@@ -82,10 +90,12 @@
 import argparse
 import math
 import os
+import sys
 
 from transformers import BertConfig
 
 import nemo.backends.pytorch.common as nemo_common
+import nemo.backends.pytorch.common.losses
 import nemo.collections.nlp as nemo_nlp
 import nemo.core as nemo_core
 from nemo import logging
@@ -93,58 +103,138 @@
 from nemo.utils.lr_policies import get_lr_policy
 
 parser = argparse.ArgumentParser(description='BERT pretraining')
-parser.add_argument("--local_rank", default=None, type=int)
-parser.add_argument("--num_gpus", default=1, type=int)
-parser.add_argument("--num_epochs", default=10, type=int)
-parser.add_argument("--batch_size", default=64, type=int)
-parser.add_argument("--batches_per_step", default=1, type=int)
-parser.add_argument("--lr", default=0.01, type=float)
-parser.add_argument("--lr_policy", default=None, type=str)
-parser.add_argument("--lr_warmup_proportion", default=0.05, type=float)
-parser.add_argument("--optimizer", default="novograd", type=str)
-parser.add_argument("--beta1", default=0.95, type=float)
-parser.add_argument("--beta2", default=0.25, type=float)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--weight_decay", default=0.0, type=float)
-parser.add_argument("--tokenizer", default="sentence-piece", type=str, choices=["sentence-piece", "nemo-bert"])
+parser.add_argument(
+    "--local_rank", default=None, type=int, help="Automatically set when using Multi-GPU with torch.distributed."
+)
+parser.add_argument("--num_gpus", default=1, type=int, help="Number of GPUs to use.")
+parser.add_argument("--train_data", required=True, type=str, help="path to training dataset.")
+parser.add_argument("--config_file", default=None, type=str, help="The BERT model config")
+parser.add_argument("--eval_data", required=True, type=str, help="path to evaluation dataset.")
+parser.add_argument("--batch_size", default=64, type=int, help="Batch size per worker for each model pass.")
+parser.add_argument(
+    "--batches_per_step",
+    default=1,
+    type=int,
+    help="Number of gradient accumulation steps per iteration before parameters are updated.",
+)
+parser.add_argument("--lr", default=0.01, type=float, help="Initial learning rate.")
+parser.add_argument(
+    "--lr_policy",
+    default=None,
+    type=str,
+    choices=[
+        "WarmupHoldPolicy",
+        "SquareAnnealing",
+        "SquareRootAnnealing",
+        "CosineAnnealing",
+        "WarmupAnnealing",
+        "InverseSquareRootAnnealing",
+        "PolynomialDecayAnnealing",
+        "PolynomialHoldDecayAnnealing",
+    ],
+    help="Learning rate policy.",
+)
+parser.add_argument(
+    "--lr_warmup_proportion", default=0.05, type=float, help="Warm up proportion of total training iterations."
+)
+parser.add_argument(
+    "--optimizer",
+    default="novograd",
+    type=str,
+    choices=["novograd", "adam", "sgd", "adam_w", "fused_novograd", "fused_adam", "fused_lamb"],
+    help="Optimizer algorithm for training.",
+)
+parser.add_argument(
+    "--beta1",
+    default=0.95,
+    type=float,
+    help="Only needed for specific optimizers. Exponential decay rates for the 1st moment of optimizers, e.g. *adam*, *novograd*, *lamb*.",
+)
+parser.add_argument(
+    "--beta2",
+    default=0.25,
+    type=float,
+    help="Only needed for specific optimizers. Exponential decay rates for the 2nd moment of optimizers, e.g. *adam*, *novograd*, *lamb*.",
+)
+parser.add_argument(
+    "--amp_opt_level",
+    default="O0",
+    type=str,
+    choices=["O0", "O1", "O2"],
+    help="Automatic Mixed Precision optimization level. For further information visit https://nvidia.github.io/apex/amp.html.",
+)
+parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay parameter of the optimizer.")
 parser.add_argument("--max_seq_length", default=128, type=int)
-parser.add_argument("--sample_size", default=1e7, type=int)
-parser.add_argument("--mask_probability", default=0.15, type=float)
-parser.add_argument("--short_seq_prob", default=0.1, type=float)
 parser.add_argument("--vocab_size", default=3200, type=int)
 parser.add_argument("--hidden_size", default=768, type=int)
 parser.add_argument("--intermediate_size", default=3072, type=int)
-parser.add_argument("--num_hidden_layers", default=12, type=int)
 parser.add_argument("--num_attention_heads", default=12, type=int)
+parser.add_argument("--num_hidden_layers", default=12, type=int)
 parser.add_argument("--hidden_act", default="gelu", type=str)
+parser.add_argument("--gradient_predivide", action="store_true", default=False, help="use gradient predivide")
+parser.add_argument("--only_mlm_loss", action="store_true", default=False, help="use only masked language model loss")
 parser.add_argument(
-    "--max_predictions_per_seq",
-    default=20,
-    type=int,
-    help="maximum number of masked tokens to predict,\
-                    needed when --preprocessed_data is specified",
+    "--load_dir",
+    default=None,
+    type=str,
+    help="Directory with weights and optimizer checkpoints. Used for resuming training.",
 )
-parser.add_argument("--data_dir", default="data/lm/wikitext-2", type=str)
 parser.add_argument(
-    "--preprocessed_data", action="store_true", default=False, help="specify if using preprocessed data"
+    "--bert_checkpoint",
+    default=None,
+    type=str,
+    help="Path to BERT encoder weights file. Used for encoder initialization for finetuning.",
 )
-parser.add_argument("--gradient_predivide", action="store_true", default=False, help="use gradient predivide")
-parser.add_argument("--only_mlm_loss", action="store_true", default=False, help="use only masked language model loss")
 parser.add_argument(
-    "--max_steps",
-    default=-1,
+    "--work_dir", default="outputs/bert_lm", type=str, help="Output directory for checkpoints, logs etc."
+)
+parser.add_argument("--grad_norm_clip", type=float, default=-1, help="gradient clipping")
+parser.add_argument("--save_epoch_freq", default=1, type=int, help="Save checkpoints every given epoch.")
+parser.add_argument("--save_step_freq", default=100, type=int, help="Save checkpoints every given iteration.")
+parser.add_argument("--train_step_freq", default=25, type=int, help="Print training metrics every given iteration.")
+parser.add_argument("--eval_step_freq", default=25, type=int, help="Print evaluation metrics every given iteration.")
+sub_parsers = parser.add_subparsers()
+parser_text = sub_parsers.add_parser('data_text', help='Training starting with raw text data.')
+group = parser_text.add_mutually_exclusive_group()
+group.add_argument("--num_epochs", default=10, type=int, help="Number of training epochs.")
+group.add_argument("--num_iters", default=-1, type=int, help="Number of training steps.")
+parser_text.add_argument("--sample_size", default=1e7, type=int, help="Data sample size.")
+parser_text.add_argument(
+    "--mask_probability",
+    default=0.15,
+    type=float,
+    help="Probability of masking a token in the input text during data processing.",
+)
+parser_text.add_argument(
+    "--short_seq_prob",
+    default=0.1,
+    type=float,
+    help="Probability of having a sequence shorter than the maximum sequence length `max_seq_length` in data processing.",
+)
+parser_text.add_argument(
+    "--dataset_name", default="wikitext-2", choices=["wikitext-2"], type=str, help="Dataset name."
+)
+parser_text.add_argument(
+    "--tokenizer",
+    default="sentence-piece",
+    type=str,
+    choices=["sentence-piece"]
+    + [_.pretrained_model_name for _ in nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()],
+    help="Text tokenizer type.",
+)
+parser_preprocessed = sub_parsers.add_parser(
+    'data_preprocessed', help='Training starting with already preprocessed data.'
+)
+parser_preprocessed.add_argument(
+    "--max_predictions_per_seq",
+    default=20,
     type=int,
-    help="if specified overrides --num_epochs.\
-                        Used for preprocessed data",
+    help="Maximum number of masked tokens to predict. Need to match the number of masked tokens in the input data sets.",
 )
-parser.add_argument("--dataset_name", default="wikitext-2", type=str)
-parser.add_argument("--load_dir", default=None, type=str)
-parser.add_argument("--bert_checkpoint", default=None, type=str, help="specify path to pretrained BERT weights")
-parser.add_argument("--work_dir", default="outputs/bert_lm", type=str)
-parser.add_argument("--save_epoch_freq", default=1, type=int)
-parser.add_argument("--save_step_freq", default=100, type=int)
-parser.add_argument("--print_step_freq", default=25, type=int)
-parser.add_argument("--config_file", default=None, type=str, help="The BERT model config")
+parser_preprocessed.add_argument(
+    "--num_iters", default=100, type=int, help="Number of training steps.",
+)
+
 args = parser.parse_args()
 
 nf = nemo_core.NeuralModuleFactory(
@@ -167,27 +257,25 @@
     args.hidden_act = config['hidden_act']
     args.max_seq_length = config['max_position_embeddings']
 
-if not args.preprocessed_data:
-    special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+if 'data_text' in sys.argv:
+    special_tokens = nemo_nlp.data.get_bert_special_tokens('bert')
+
     data_desc = BERTPretrainingDataDesc(
         args.dataset_name,
-        args.data_dir,
-        args.vocab_size,
-        args.sample_size,
-        list(set(special_tokens.values())),
-        'train.txt',
+        train_data=args.train_data,
+        eval_data=args.eval_data,
+        vocab_size=args.vocab_size,
+        sample_size=args.sample_size,
+        special_tokens=list(set(special_tokens.values())),
     )
     if args.tokenizer == "sentence-piece":
         logging.info("To use SentencePieceTokenizer.")
         tokenizer = nemo_nlp.data.SentencePieceTokenizer(
             model_path=data_desc.tokenizer_model, special_tokens=special_tokens
         )
-    elif args.tokenizer == "nemo-bert":
-        logging.info("To use NemoBertTokenizer.")
-        # To train on a Chinese dataset, use NemoBertTokenizer
-        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model="bert-base-uncased")
     else:
-        raise ValueError("Please add your tokenizer " "or use sentence-piece or nemo-bert.")
+        logging.info("Using Huggingface BERT tokenizer.")
+        tokenizer = nemo_nlp.data.NemoBertTokenizer(pretrained_model=args.tokenizer)
     args.vocab_size = tokenizer.vocab_size
 
 
@@ -208,21 +296,21 @@
 data layers, BERT encoder, and MLM and NSP loss functions
 """
 
-mlm_classifier = nemo_nlp.nm.trainables.token_classification_nm.BertTokenClassifier(
+mlm_classifier = nemo_nlp.nm.trainables.BertTokenClassifier(
     args.hidden_size, num_classes=args.vocab_size, activation=args.hidden_act, log_softmax=True
 )
-mlm_loss_fn = nemo_nlp.nm.losses.MaskedLanguageModelingLossNM()
+mlm_loss_fn = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss()
 if not args.only_mlm_loss:
-    nsp_classifier = nemo_nlp.nm.trainables.sequence_classification_nm.SequenceClassifier(
+    nsp_classifier = nemo_nlp.nm.trainables.SequenceClassifier(
         args.hidden_size, num_classes=2, num_layers=2, activation='tanh', log_softmax=False
     )
-    nsp_loss_fn = nemo_common.CrossEntropyLoss()
+    nsp_loss_fn = nemo_common.CrossEntropyLossNM()
 
-    bert_loss = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)
+    bert_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
 
 # tie weights of MLM softmax layer and embedding layer of the encoder
 if mlm_classifier.mlp.last_linear_layer.weight.shape != bert_model.bert.embeddings.word_embeddings.weight.shape:
-    raise ValueError("Final classification layer does not match embedding " "layer.")
+    raise ValueError("Final classification layer does not match embedding layer.")
 # mlm_classifier.mlp.last_linear_layer.weight = bert_model.bert.embeddings.word_embeddings.weight
 mlm_classifier.tie_weights_with(
     bert_model,
@@ -241,12 +329,18 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
             kwargs['short_seq_prob'],
         )
         data_layer = nemo_nlp.nm.data_layers.BertPretrainingDataLayer(
-            tokenizer, data_file, max_seq_length, mask_probability, short_seq_prob, batch_size=batch_size
+            tokenizer,
+            data_file,
+            max_seq_length,
+            mask_probability,
+            short_seq_prob,
+            batch_size=batch_size,
+            shuffle=kwargs['mode'] == "train",
         )
     else:
-        training, max_predictions_per_seq = (kwargs['training'], kwargs['max_predictions_per_seq'])
+        mode, max_predictions_per_seq = (kwargs['mode'], kwargs['max_predictions_per_seq'])
         data_layer = nemo_nlp.nm.data_layers.BertPretrainingPreprocessedDataLayer(
-            data_file, max_predictions_per_seq, batch_size=batch_size, training=training
+            data_file, max_predictions_per_seq, batch_size=batch_size, mode=mode,
         )
 
     steps_per_epoch = math.ceil(len(data_layer) / (batch_size * args.num_gpus * batches_per_step))
@@ -256,7 +350,7 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
         input_ids=input_data.input_ids, token_type_ids=input_data.input_type_ids, attention_mask=input_data.input_mask
     )
     mlm_logits = mlm_classifier(hidden_states=hidden_states)
-    mlm_loss = mlm_loss_fn(logits=mlm_logits, output_ids=input_data.output_ids, output_mask=input_data.output_mask)
+    mlm_loss = mlm_loss_fn(logits=mlm_logits, labels=input_data.output_ids, output_mask=input_data.output_mask)
     if not args.only_mlm_loss:
         nsp_logits = nsp_classifier(hidden_states=hidden_states)
         nsp_loss = nsp_loss_fn(logits=nsp_logits, labels=input_data.labels)
@@ -267,7 +361,7 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
     return loss, mlm_loss, nsp_loss, steps_per_epoch
 
 
-if not args.preprocessed_data:
+if 'data_text' in sys.argv:
     train_loss, mlm_loss, nsp_loss, steps_per_epoch = create_pipeline(
         data_file=data_desc.train_file,
         preprocessed_data=False,
@@ -276,14 +370,33 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
         short_seq_prob=args.short_seq_prob,
         batch_size=args.batch_size,
         batches_per_step=args.batches_per_step,
+        mode="train",
+    )
+    eval_loss, eval_mlm_loss, eval_nsp_loss, eval_steps_per_epoch = create_pipeline(
+        data_file=data_desc.eval_file,
+        preprocessed_data=False,
+        max_seq_length=args.max_seq_length,
+        mask_probability=args.mask_probability,
+        short_seq_prob=args.short_seq_prob,
+        batch_size=args.batch_size,
+        batches_per_step=args.batches_per_step,
+        mode="eval",
     )
 else:
     max_pred_len = args.max_predictions_per_seq
     train_loss, mlm_loss, nsp_loss, steps_per_epoch = create_pipeline(
-        data_file=args.data_dir,
+        data_file=args.train_data,
+        preprocessed_data=True,
+        max_predictions_per_seq=max_pred_len,
+        mode="train",
+        batch_size=args.batch_size,
+        batches_per_step=args.batches_per_step,
+    )
+    eval_loss, eval_mlm_loss, eval_nsp_loss, eval_steps_per_epoch = create_pipeline(
+        data_file=args.eval_data,
         preprocessed_data=True,
         max_predictions_per_seq=max_pred_len,
-        training=True,
+        mode="eval",
         batch_size=args.batch_size,
         batches_per_step=args.batches_per_step,
     )
@@ -298,7 +411,7 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
     print_msg = "Loss: {:.3f}"
 train_callback = nemo_core.SimpleLossLoggerCallback(
     tensors=log_tensors,
-    step_freq=args.print_step_freq,
+    step_freq=args.train_step_freq,
     print_func=lambda x: logging.info(print_msg.format(*[y.item() for y in x])),
     get_tb_values=lambda x: [["loss", x[0]]],
     tb_writer=nf.tb_writer,
@@ -311,15 +424,22 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
     step_freq=args.save_step_freq,
 )
 
+eval_callback = nemo.core.EvaluatorCallback(
+    eval_tensors=[eval_loss],
+    user_iter_callback=nemo_nlp.callbacks.lm_bert_callback.eval_iter_callback,
+    user_epochs_done_callback=nemo_nlp.callbacks.lm_bert_callback.eval_epochs_done_callback,
+    eval_step=args.eval_step_freq,
+)
+
 # define learning rate decay policy
 if args.lr_policy is not None:
-    if args.max_steps < 0:
+    if args.num_iters < 0:
         lr_policy_fn = get_lr_policy(
             args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
         )
     else:
         lr_policy_fn = get_lr_policy(
-            args.lr_policy, total_steps=args.max_steps, warmup_ratio=args.lr_warmup_proportion
+            args.lr_policy, total_steps=args.num_iters, warmup_ratio=args.lr_warmup_proportion
         )
 else:
     lr_policy_fn = None
@@ -330,20 +450,27 @@ def create_pipeline(data_file, batch_size, preprocessed_data=False, batches_per_
 
 # define and launch training algorithm (optimizer)
 optimization_params = {
-    "batch_size": args.batch_size,
     "lr": args.lr,
     "betas": (args.beta1, args.beta2),
     "weight_decay": args.weight_decay,
 }
 
-if args.max_steps < 0:
+if args.num_iters < 0:
     optimization_params['num_epochs'] = args.num_epochs
 else:
-    optimization_params['max_steps'] = args.max_steps
+    optimization_params['max_steps'] = args.num_iters
+
+if args.grad_norm_clip >= 0:
+    optimization_params['grad_norm_clip'] = args.grad_norm_clip
+call_backs = [train_callback, ckpt_callback, eval_callback]
+
+if 'data_preprocessed' in sys.argv:
+    call_backs = [train_callback, ckpt_callback]
+
 nf.train(
     tensors_to_optimize=[train_loss],
     lr_policy=lr_policy_fn,
-    callbacks=[train_callback, ckpt_callback],
+    callbacks=call_backs,
     optimizer=args.optimizer,
     batches_per_step=args.batches_per_step,
     gradient_predivide=args.gradient_predivide,
diff --git a/examples/nlp/scripts/create_vocab.py b/examples/nlp/language_modeling/create_vocab.py
similarity index 100%
rename from examples/nlp/scripts/create_vocab.py
rename to examples/nlp/language_modeling/create_vocab.py
diff --git a/examples/nlp/scripts/get_wkt2.sh b/examples/nlp/language_modeling/get_wkt2.sh
similarity index 63%
rename from examples/nlp/scripts/get_wkt2.sh
rename to examples/nlp/language_modeling/get_wkt2.sh
index 206160bf8cd8..75efd08722e4 100755
--- a/examples/nlp/scripts/get_wkt2.sh
+++ b/examples/nlp/language_modeling/get_wkt2.sh
@@ -4,12 +4,12 @@ This file is adapted from
 https://github.com/salesforce/awd-lstm-lm/blob/master/getdata.sh
 Copyright by the AWD LSTM authors.
 """
-
+DATA_DIR=$1
 echo "- Downloading WikiText-2"
 
-wget --continue -P data/lm/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
-unzip -q data/lm/wikitext-2-v1.zip -d data/lm
-cd data/lm/wikitext-2
+wget --continue -P $DATA_DIR https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
+unzip -q $DATA_DIR/wikitext-2-v1.zip -d $DATA_DIR
+cd $DATA_DIR/wikitext-2
 mv wiki.train.tokens train.txt
 sed -i -e "s/<unk>/[UNK]/g" train.txt
 mv wiki.valid.tokens valid.txt
@@ -18,3 +18,5 @@ mv wiki.test.tokens test.txt
 sed -i -e "s/<unk>/[UNK]/g" test.txt
 cd ..
 rm wikitext-2-v1.zip
+
+echo "- WikiText-2 saved at $DATA_DIR/wikitext-2"
diff --git a/examples/nlp/language_modeling/language_modeling_transformer.py b/examples/nlp/language_modeling/language_modeling_transformer.py
index 2572b90af785..2e2630d47679 100644
--- a/examples/nlp/language_modeling/language_modeling_transformer.py
+++ b/examples/nlp/language_modeling/language_modeling_transformer.py
@@ -17,10 +17,12 @@
 
 import nemo
 import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.nm.data_layers.lm_transformer_datalayer
-import nemo.collections.nlp.nm.trainables.common.token_classification_nm
+from nemo import logging
 from nemo.collections.nlp.callbacks.lm_transformer_callback import eval_epochs_done_callback, eval_iter_callback
 from nemo.collections.nlp.data.datasets.lm_transformer_dataset import LanguageModelDataDesc
+from nemo.collections.nlp.nm.data_layers import LanguageModelingDataLayer
+from nemo.collections.nlp.nm.losses import SmoothedCrossEntropyLoss
+from nemo.collections.nlp.nm.trainables.common import TokenClassifier
 from nemo.core import WeightShareTransform
 from nemo.utils.lr_policies import CosineAnnealing
 
@@ -105,13 +107,9 @@
     max_seq_length=args.max_seq_length,
 )
 
-log_softmax = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
-    args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True
-)
+log_softmax = TokenClassifier(args.d_model, num_classes=vocab_size, num_layers=1, log_softmax=True)
 
-loss = nemo_nlp.nm.losses.PaddedSmoothedCrossEntropyLossNM(
-    pad_id=tokenizer.pad_id, label_smoothing=args.label_smoothing
-)
+loss = SmoothedCrossEntropyLoss(pad_id=tokenizer.pad_id, label_smoothing=args.label_smoothing)
 
 # tie weight of embedding and log_softmax layers
 # log_softmax.mlp.last_linear_layer.weight = encoder.embedding_layer.token_embedding.weight
@@ -127,13 +125,11 @@
 def create_pipeline(
     dataset, max_seq_length=args.max_seq_length, batch_step=args.max_seq_length, batch_size=args.batch_size
 ):
-    data_layer = nemo.collections.nlp.nm.data_layers.lm_transformer_datalayer.LanguageModelingDataLayer(
-        dataset, tokenizer, max_seq_length, batch_size, batch_step
-    )
-    src, src_mask, labels = data_layer()
-    src_hiddens = encoder(input_ids=src, input_mask_src=src_mask)
+    data_layer = LanguageModelingDataLayer(dataset, tokenizer, max_seq_length, batch_size, batch_step)
+    input_data = data_layer()
+    src_hiddens = encoder(input_ids=input_data.input_ids, input_mask_src=input_data.input_mask)
     logits = log_softmax(hidden_states=src_hiddens)
-    return loss(logits=logits, target_ids=labels)
+    return loss(logits=logits, labels=input_data.labels)
 
 
 train_loss = create_pipeline(
@@ -153,7 +149,7 @@ def create_pipeline(
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=[train_loss],
     step_freq=100,
-    print_func=lambda x: str(x[0].item()),
+    print_func=lambda x: logging.info(str(x[0].item())),
     get_tb_values=lambda x: [["loss", x[0]]],
     tb_writer=nf.tb_writer,
 )
diff --git a/examples/nlp/scripts/process_wiki_zh.py b/examples/nlp/language_modeling/process_wiki_zh.py
similarity index 98%
rename from examples/nlp/scripts/process_wiki_zh.py
rename to examples/nlp/language_modeling/process_wiki_zh.py
index 9001fe42c1b4..dceae626b33a 100755
--- a/examples/nlp/scripts/process_wiki_zh.py
+++ b/examples/nlp/language_modeling/process_wiki_zh.py
@@ -139,7 +139,7 @@ def process(data_dir, output_dir=None, min_frequency=3, max_files=-1):
     parser.add_argument("--data_dir", default="/raid/data/wiki_zh", type=str)
     parser.add_argument("--output_dir", default="./", type=str)
     parser.add_argument(
-        "--min_frequency", default=0, type=int, help="Characters occuring less frequently " "will be filtered out"
+        "--min_frequency", default=0, type=int, help="Characters occuring less frequently will be filtered out"
     )
     parser.add_argument("--max_files", default=-1, type=int, help="Max number of dirs to process")
     args = parser.parse_args()
diff --git a/examples/nlp/neural_machine_translation/machine_translation_tutorial.py b/examples/nlp/neural_machine_translation/machine_translation_tutorial.py
index ae05afa88e32..e628758810da 100644
--- a/examples/nlp/neural_machine_translation/machine_translation_tutorial.py
+++ b/examples/nlp/neural_machine_translation/machine_translation_tutorial.py
@@ -23,6 +23,7 @@
 
 import nemo
 import nemo.collections.nlp as nemo_nlp
+from nemo import logging
 from nemo.collections.nlp.callbacks.machine_translation_callback import eval_epochs_done_callback, eval_iter_callback
 from nemo.core import WeightShareTransform
 from nemo.utils.lr_policies import get_lr_policy
@@ -161,7 +162,7 @@
     eos_token=tgt_tokenizer.eos_id,
 )
 
-loss_fn = nemo_nlp.nm.losses.PaddedSmoothedCrossEntropyLossNM(
+loss_fn = nemo_nlp.nm.losses.SmoothedCrossEntropyLoss(
     pad_id=tgt_tokenizer.pad_id, label_smoothing=args.label_smoothing
 )
 
@@ -202,7 +203,7 @@ def create_pipeline(dataset_src, dataset_tgt, tokens_in_batch, clean=False, trai
         input_ids_tgt=tgt, hidden_states_src=src_hiddens, input_mask_src=src_mask, input_mask_tgt=tgt_mask
     )
     logits = log_softmax(hidden_states=tgt_hiddens)
-    loss = loss_fn(logits=logits, target_ids=labels)
+    loss = loss_fn(logits=logits, labels=labels)
     beam_results = None
     if not training:
         beam_results = beam_search(hidden_states_src=src_hiddens, input_mask_src=src_mask)
@@ -223,7 +224,7 @@ def create_pipeline(dataset_src, dataset_tgt, tokens_in_batch, clean=False, trai
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=[train_loss],
     step_freq=100,
-    print_func=lambda x: str(x[0].item()),
+    print_func=lambda x: logging.info(str(x[0].item())),
     get_tb_values=lambda x: [["loss", x[0]]],
     tb_writer=nf.tb_writer,
 )
diff --git a/examples/nlp/question_answering/get_bioasq.py b/examples/nlp/question_answering/get_bioasq.py
new file mode 100755
index 000000000000..1a07f8f025aa
--- /dev/null
+++ b/examples/nlp/question_answering/get_bioasq.py
@@ -0,0 +1,104 @@
+#!/bin/bash
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+# Disclaimer:
+# All the data in this repository is no longer updated since 2019.Jan.24th and it may not reflect current data available.
+#
+#### BioASQ
+# Before using the files in this repository, you must first register BioASQ website and download the [BioASQ Task B](http://participants-area.bioasq.org/Tasks/A/getData/) data.
+# See "An overview of the BIOASQ large-scale biomedical semantic indexing and question answering competition (Tsatsaronis et al. 2015)" for datasets details.
+#
+# Copyright 2019 dmis-lab/biobert.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import logging
+import os
+import subprocess
+from distutils.dir_util import copy_tree
+
+from nemo import logging
+
+URL = {
+    'bioasq': 'https://drive.google.com/uc?id=19ft5q44W4SuptJgTwR84xZjsHg1jvjSZ',
+    'bioasq_6b': 'https://drive.google.com/uc?id=1-KzAQzaE-Zd4jOlZG_7k7D4odqPI3dL1',
+}
+
+
+def download(download_url: str, parent_dir: str):
+    tmp_zip = '/tmp/data.zip'
+    tmp_unzip = '/tmp/data'
+    if not os.path.exists(tmp_unzip):
+        os.makedirs(tmp_unzip)
+    else:
+        subprocess.run(['rm', '-rf', tmp_unzip])
+    subprocess.run(['gdown', '-O', tmp_zip, download_url])
+    subprocess.run(['unzip', tmp_zip, '-d', tmp_unzip])
+    copy_tree(tmp_unzip, parent_dir)
+    subprocess.run(['rm', '-rf', tmp_zip])
+    subprocess.run(['rm', '-rf', tmp_unzip])
+
+
+def __maybe_download_file(parent_dir: str):
+    """
+    from https://github.com/dmis-lab/biobert download https://drive.google.com/uc?id=19ft5q44W4SuptJgTwR84xZjsHg1jvjSZ
+    from https://github.com/dmis-lab/bioasq-biobert#datasets  https://drive.google.com/uc?id=1-KzAQzaE-Zd4jOlZG_7k7D4odqPI3dL1
+
+    If exists, skips download
+    Args:
+        parent_dir: local filepath
+    """
+    target_dir = os.path.join(parent_dir, 'BioASQ')
+    if os.path.exists(target_dir):
+        logging.info(f'{target_dir} found. Skipping download')
+    else:
+        download_url = URL['bioasq']
+        logging.info(f'Downloading {download_url} from https://github.com/dmis-lab/biobert to {target_dir}')
+        download(download_url, parent_dir)
+    parent_dir = target_dir
+    target_dir = os.path.join(parent_dir, 'BioASQ-6b')
+    if os.path.exists(target_dir):
+        logging.info(f'{target_dir} found. Skipping download')
+    else:
+        download_url = URL['bioasq_6b']
+        logging.info(
+            f'Downloading {download_url} from https://github.com/dmis-lab/bioasq-biobert#datasets to {target_dir}'
+        )
+        download(download_url, parent_dir)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Prepare dataset')
+    parser.add_argument("--data_dir", required=True, type=str, help="directory to download dataset to")
+    args = parser.parse_args()
+
+    if not os.path.exists(args.data_dir):
+        os.makedirs(args.data_dir)
+
+    logging.info(f'Downloading dataset')
+    __maybe_download_file(args.data_dir)
diff --git a/examples/nlp/scripts/get_squad.py b/examples/nlp/question_answering/get_squad.py
similarity index 98%
rename from examples/nlp/scripts/get_squad.py
rename to examples/nlp/question_answering/get_squad.py
index cc31f20eea1d..55ade6aef138 100755
--- a/examples/nlp/scripts/get_squad.py
+++ b/examples/nlp/question_answering/get_squad.py
@@ -46,7 +46,7 @@ def download(self):
             url = item
             file = self.download_urls[item]
 
-            logging.info('Downloading:', url)
+            logging.info('Downloading: %s', url)
             if os.path.isfile(self.save_path + '/' + file):
                 logging.info('** Download file already exists, skipping download')
             else:
diff --git a/examples/nlp/question_answering/question_answering_squad.py b/examples/nlp/question_answering/question_answering_squad.py
index 6f7197e349aa..c23f3aa27532 100755
--- a/examples/nlp/question_answering/question_answering_squad.py
+++ b/examples/nlp/question_answering/question_answering_squad.py
@@ -1,72 +1,99 @@
-"""
-Copyright 2018 The Google AI Language Team Authors and
-The HuggingFace Inc. team.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
+"""
 Some transformer of this code were adapted from the HuggingFace library at
 https://github.com/huggingface/transformers
-"""
 
-"""
-Download the Squad data by running the script:
-examples/nlp/scripts/get_squad.py
+Download the SQuAD data by running the script:
+examples/nlp/question_answering/get_squad.py
 
-To finetune Squad v1.1 on pretrained BERT large uncased on 1 GPU:
+To finetune SQuADv1.1 on pretrained BERT Base uncased on 1 GPU:
 python question_answering_squad.py
 --train_file /path_to_data_dir/squad/v1.1/train-v1.1.json
---dev_file /path_to_data_dir/squad/v1.1/dev-v1.1.json
+--eval_file /path_to_data_dir/squad/v1.1/dev-v1.1.json
 --work_dir /path_to_output_folder
+--bert_config /path_to/bert-config.json
+--pretrained_model_name bert-base-uncased
 --bert_checkpoint /path_to_bert_checkpoint
---amp_opt_level "O1"
+--amp_opt_level "O2"
 --batch_size 24
 --num_epochs 2
 --lr_policy WarmupAnnealing
---lr_warmup_proportion 0.0
---optimizer adam_w
---weight_decay 0.0
+--optimizer fused_adam
 --lr 3e-5
 --do_lower_case
+--mode train_eval
+--no_data_cache
 
-If --bert_checkpoint is not specified, training starts from
+If --bert_checkpoint and --bert_config are not specified, training starts from
 Huggingface pretrained checkpoints.
 
-To finetune Squad v1.1 on pretrained BERT large uncased on 8 GPU:
+To finetune SQuADv1.1 on pretrained BERT large uncased on 8 GPU change to:
 python -m torch.distributed.launch --nproc_per_node=8 question_answering_squad.py
---amp_opt_level "O1"
---train_file /path_to_data_dir/squad/v1.1/train-v1.1.json
---dev_file /path_to_data_dir/squad/v1.1/dev-v1.1.json
---bert_checkpoint /path_to_bert_checkpoint
 --batch_size 3
 --num_gpus 8
---num_epochs 2
---lr_policy WarmupAnnealing
---lr_warmup_proportion 0.0
---optimizer adam_w
---weight_decay 0.0
---lr 3e-5
---do_lower_case
+...
 
-On Huggingface the final Exact Match (EM) and F1 scores are as follows:
-Model	                EM      F1
-BERT Based uncased      80.59    88.34
-BERT Large uncased      83.88    90.65
+This takes about 18 minutes.
+
+To finetune on SQuADv2.0 which allows non-answerable questions, add the flag --version_2_with_negative.
+
+To run only evaluation on pretrained question answering checkpoints on 1 GPU with ground-truth data:
+python question_answering_squad.py
+--eval_file /path_to_data_dir/test.json
+--checkpoint_dir /path_to_checkpoints
+--mode eval
+
+To run only inference on pretrained question answering checkpoints on 1 GPU without ground-truth data:
+python question_answering_squad.py
+--test_file /path_to_data_dir/test.json
+--checkpoint_dir /path_to_checkpoints
+--mode test
+
+Finetuned SQuAD models and model configuration files can be found at 
+https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv1
+https://ngc.nvidia.com/catalog/models/nvidia:bertlargeuncasedsquadv2
+https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv1
+https://ngc.nvidia.com/catalog/models/nvidia:bertbaseuncasedsquadv2
+
+
+On BERT base uncased pretrained model
+the final Exact Match (EM) and F1 scores are as follows:
+Data	        EM      F1
+SQuADv1.1       82.74   89.79
+SQuADv2.0       71.24   74.32
+
+
+On BERT large uncased pretrained model
+the final Exact Match (EM) and F1 scores are as follows:
+Data	        EM      F1
+SQuADv1.1       85.79   92.28
+SQuADv2.0       80.17   83.32
 """
 import argparse
 import json
 import os
 
+import numpy as np
+
 import nemo.collections.nlp as nemo_nlp
+import nemo.collections.nlp.data.tokenizers.tokenizer_utils
 import nemo.core as nemo_core
 from nemo import logging
 from nemo.collections.nlp.callbacks.qa_squad_callback import eval_epochs_done_callback, eval_iter_callback
@@ -79,20 +106,29 @@ def parse_args():
         "--train_file", type=str, help="The training data file. Should be *.json",
     )
     parser.add_argument(
-        "--dev_file", type=str, required=True, help="The evaluation data file. Should be *.json",
+        "--eval_file", type=str, help="The evaluation data file. Should be *.json",
+    )
+    parser.add_argument(
+        "--test_file", type=str, help="The test data file. Should be *.json. Does not need to contain ground truth",
+    )
+    parser.add_argument(
+        '--pretrained_model_name',
+        default='roberta-base',
+        type=str,
+        help='Name of the pre-trained model',
+        choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
     )
-    parser.add_argument("--pretrained_model_name", type=str, help="Name of the pre-trained model")
     parser.add_argument("--checkpoint_dir", default=None, type=str, help="Checkpoint directory for inference.")
     parser.add_argument(
-        "--bert_checkpoint", default=None, type=str, help="Path to BERT model checkpoint for finetuning."
+        "--bert_checkpoint", default=None, type=str, help="Path to BERT encoder checkpoint for finetuning."
     )
-    parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
     parser.add_argument(
-        "--model_type", default="bert", type=str, help="model type", choices=['bert', 'roberta', 'albert']
+        "--head_checkpoint", default=None, type=str, help="Path to BERT QA head checkpoint for finetuning."
     )
+    parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
     parser.add_argument(
         "--tokenizer_model",
-        default="tokenizer.model",
+        default=None,
         type=str,
         help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
     )
@@ -103,24 +139,32 @@ def parse_args():
         choices=["nemobert", "sentencepiece"],
         help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
     )
-    parser.add_argument("--optimizer_kind", default="adam", type=str, help="Optimizer kind")
+    parser.add_argument("--optimizer", default="adam_w", type=str, help="Optimizer kind")
+    parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
     parser.add_argument("--lr_policy", default="WarmupAnnealing", type=str)
     parser.add_argument("--lr", default=3e-5, type=float, help="The initial learning rate.")
     parser.add_argument("--lr_warmup_proportion", default=0.0, type=float)
     parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.")
     parser.add_argument("--num_epochs", default=2, type=int, help="Total number of training epochs to perform.")
+    parser.add_argument("--max_steps", default=-1, type=int, help="If specified overrides --num_epochs.")
     parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training/evaluation.")
+    parser.add_argument("--grad_norm_clip", type=float, default=-1, help="gradient clipping")
     parser.add_argument(
         "--do_lower_case",
         action='store_true',
-        help="Whether to lower case the input text. " "True for uncased models, False for cased models.",
+        help="Whether to lower case the input text. True for uncased models, False for cased models.",
+    )
+    parser.add_argument(
+        "--mode", default="train_eval", choices=["train", "train_eval", "eval", "test"], help="Mode of model usage."
+    )
+    parser.add_argument(
+        "--no_data_cache", action='store_true', help="When specified do not load and store cache preprocessed data.",
     )
-    parser.add_argument("--evaluation_only", action='store_true', help="Whether to only do evaluation.")
     parser.add_argument(
         "--doc_stride",
         default=128,
         type=int,
-        help="When splitting up a long document into chunks, " "how much stride to take between chunks.",
+        help="When splitting up a long document into chunks, how much stride to take between chunks.",
     )
     parser.add_argument(
         "--max_query_length",
@@ -148,38 +192,37 @@ def parse_args():
         "--work_dir",
         default='output_squad',
         type=str,
-        help="The output directory where the " "model predictions and checkpoints " "will be written.",
+        help="The output directory where the model predictions and checkpoints will be written.",
     )
     parser.add_argument(
         "--save_epoch_freq",
         default=1,
         type=int,
-        help="Frequency of saving checkpoint " "'-1' - epoch checkpoint won't be saved",
+        help="Frequency of saving checkpoint '-1' - epoch checkpoint won't be saved",
     )
     parser.add_argument(
         "--save_step_freq",
         default=-1,
         type=int,
-        help="Frequency of saving checkpoint " "'-1' - step checkpoint won't be saved",
+        help="Frequency of saving checkpoint '-1' - epoch checkpoint won't be saved",
+    )
+    parser.add_argument("--train_step_freq", default=100, type=int, help="Frequency of printing training loss")
+    parser.add_argument(
+        "--eval_step_freq", default=500, type=int, help="Frequency of evaluation during training on evaluation data"
     )
-    parser.add_argument("--loss_step_freq", default=100, type=int, help="Frequency of printing loss")
-    parser.add_argument("--eval_step_freq", default=500, type=int, help="Frequency of evaluation on dev data")
     parser.add_argument(
         "--version_2_with_negative",
         action="store_true",
-        help="If true, the SQuAD examples contain some that " "do not have an answer.",
+        help="If true, the examples contain some that do not have an answer.",
     )
     parser.add_argument(
         '--null_score_diff_threshold',
         type=float,
         default=0.0,
-        help="If null_score - best_non_null is " "greater than the threshold predict null.",
+        help="If null_score - best_non_null is greater than the threshold predict null.",
     )
     parser.add_argument(
-        "--n_best_size",
-        default=20,
-        type=int,
-        help="The total number of n-best predictions to " "generate in the nbest_predictions.json output file.",
+        "--n_best_size", default=20, type=int, help="The total number of n-best predictions to generate at testing.",
     )
     parser.add_argument("--batches_per_step", default=1, type=int, help="Number of iterations per step.")
     parser.add_argument(
@@ -196,7 +239,14 @@ def parse_args():
         type=str,
         required=False,
         default="predictions.json",
-        help="File to write predictions to. " "Only in evaluation mode.",
+        help="File to write predictions to. Only in evaluation or test mode.",
+    )
+    parser.add_argument(
+        "--output_nbest_file",
+        type=str,
+        required=False,
+        default="nbest.json",
+        help="File to write nbest predictions to. Only in evaluation or test mode.",
     )
     args = parser.parse_args()
     return args
@@ -206,15 +256,16 @@ def create_pipeline(
     data_file,
     model,
     head,
-    loss_fn,
     max_query_length,
     max_seq_length,
     doc_stride,
     batch_size,
     version_2_with_negative,
+    mode,
     num_gpus=1,
     batches_per_step=1,
-    mode="train",
+    loss_fn=None,
+    use_data_cache=True,
 ):
     data_layer = nemo_nlp.nm.data_layers.BertQuestionAnsweringDataLayer(
         mode=mode,
@@ -225,6 +276,8 @@ def create_pipeline(
         max_query_length=max_query_length,
         max_seq_length=max_seq_length,
         doc_stride=doc_stride,
+        shuffle="train" in mode,
+        use_cache=use_data_cache,
     )
 
     input_data = data_layer()
@@ -234,36 +287,46 @@ def create_pipeline(
     )
 
     qa_output = head(hidden_states=hidden_states)
-    loss_output = loss_fn(
-        logits=qa_output, start_positions=input_data.start_positions, end_positions=input_data.end_positions
-    )
 
     steps_per_epoch = len(data_layer) // (batch_size * num_gpus * batches_per_step)
-    return (
-        loss_output.loss,
-        steps_per_epoch,
-        [loss_output.start_logits, loss_output.end_logits, input_data.unique_ids],
-        data_layer,
-    )
 
+    if mode == "test":
+        return (
+            steps_per_epoch,
+            [input_data.unique_ids, qa_output],
+            data_layer,
+        )
+    else:
+        loss_output = loss_fn(
+            logits=qa_output, start_positions=input_data.start_positions, end_positions=input_data.end_positions
+        )
 
-MODEL_CLASSES = {
-    'bert': nemo_nlp.nm.trainables.huggingface.BERT,
-    'albert': nemo_nlp.nm.trainables.huggingface.Albert,
-    'roberta': nemo_nlp.nm.trainables.huggingface.Roberta,
-}
+        return (
+            loss_output.loss,
+            steps_per_epoch,
+            [input_data.unique_ids, loss_output.start_logits, loss_output.end_logits],
+            data_layer,
+        )
 
 
 if __name__ == "__main__":
     args = parse_args()
-    if not os.path.exists(args.dev_file):
-        raise FileNotFoundError(
-            "eval data not found. Datasets can be " "obtained using examples/nlp/scripts/get_squad.py"
-        )
-    if not args.evaluation_only and not os.path.exists(args.train_file):
-        raise FileNotFoundError(
-            "train data not found. Datasets can be " "obtained using examples/nlp/scripts/get_squad.py"
-        )
+
+    if "train" in args.mode:
+        if not os.path.exists(args.train_file):
+            raise FileNotFoundError(
+                "train data not found. Datasets can be obtained using examples/nlp/question_answering/get_squad.py"
+            )
+    if "eval" in args.mode:
+        if not os.path.exists(args.eval_file):
+            raise FileNotFoundError(
+                "eval data not found. Datasets can be obtained using examples/nlp/question_answering/get_squad.py"
+            )
+    if "test" in args.mode:
+        if not os.path.exists(args.test_file):
+            raise FileNotFoundError(
+                "test data not found. Datasets can be obtained using examples/nlp/question_answering/get_squad.py"
+            )
 
     # Instantiate neural factory with supported backend
     nf = nemo_core.NeuralModuleFactory(
@@ -276,54 +339,32 @@ def create_pipeline(
         add_time_to_log_dir=False,
     )
 
-    if args.tokenizer == "sentencepiece":
-        try:
-            tokenizer = nemo_nlp.data.SentencePieceTokenizer(model_path=args.tokenizer_model)
-        except Exception:
-            raise ValueError(
-                "Using --tokenizer=sentencepiece \
-                        requires valid --tokenizer_model"
-            )
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS[args.model_type]
-        tokenizer.add_special_tokens(special_tokens)
-    else:
-        tokenizer_cls = nemo_nlp.data.NemoBertTokenizer
-        tokenizer_special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS[args.model_type]
-        tokenizer_name = nemo_nlp.utils.MODEL_NAMES[args.model_type]["tokenizer_name"]
-        tokenizer = tokenizer_cls(
-            do_lower_case=args.do_lower_case,
-            pretrained_model=tokenizer_name,
-            special_tokens=tokenizer_special_tokens,
-            bert_derivate=args.model_type,
-        )
-
-    model_cls = MODEL_CLASSES[args.model_type]
-    model_name = nemo_nlp.utils.MODEL_NAMES[args.model_type]["model_name"]
-
-    if args.pretrained_model_name is None:
-        args.pretrained_model_name = model_name
+    model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+        pretrained_model_name=args.pretrained_model_name,
+        config=args.bert_config,
+        vocab=args.vocab_file,
+        checkpoint=args.bert_checkpoint,
+    )
 
-    if args.bert_config is not None:
-        with open(args.bert_config) as json_file:
-            config = json.load(json_file)
-        model = model_cls(**config)
-    else:
-        """ Use this if you're using a standard BERT model.
-        To see the list of pretrained models, call:
-        nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()
-        """
-        model = model_cls(pretrained_model_name=args.pretrained_model_name)
+    tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+        tokenizer_name=args.tokenizer,
+        pretrained_model_name=args.pretrained_model_name,
+        tokenizer_model=args.tokenizer_model,
+        vocab_file=args.vocab_file,
+        do_lower_case=args.do_lower_case,
+    )
 
     hidden_size = model.hidden_size
 
     qa_head = nemo_nlp.nm.trainables.TokenClassifier(
-        hidden_size=hidden_size, num_classes=2, num_layers=1, log_softmax=False
+        hidden_size=hidden_size, num_classes=2, num_layers=1, log_softmax=False, name="TokenClassifier"
     )
-    squad_loss = nemo_nlp.nm.losses.QuestionAnsweringLoss()
-    if args.bert_checkpoint is not None:
-        model.restore_from(args.bert_checkpoint)
+    squad_loss = nemo_nlp.nm.losses.SpanningLoss()
+
+    if args.head_checkpoint is not None:
+        qa_head.restore_from(args.head_checkpoint)
 
-    if not args.evaluation_only:
+    if "train" in args.mode:
         train_loss, train_steps_per_epoch, _, _ = create_pipeline(
             data_file=args.train_file,
             model=model,
@@ -337,91 +378,146 @@ def create_pipeline(
             num_gpus=args.num_gpus,
             batches_per_step=args.batches_per_step,
             mode="train",
+            use_data_cache=not args.no_data_cache,
+        )
+    if "eval" in args.mode:
+        _, _, eval_output, eval_data_layer = create_pipeline(
+            data_file=args.eval_file,
+            model=model,
+            head=qa_head,
+            loss_fn=squad_loss,
+            max_query_length=args.max_query_length,
+            max_seq_length=args.max_seq_length,
+            doc_stride=args.doc_stride,
+            batch_size=args.batch_size,
+            version_2_with_negative=args.version_2_with_negative,
+            num_gpus=args.num_gpus,
+            batches_per_step=args.batches_per_step,
+            mode="eval",
+            use_data_cache=not args.no_data_cache,
+        )
+    if "test" in args.mode:
+        _, eval_output, test_data_layer = create_pipeline(
+            data_file=args.test_file,
+            model=model,
+            head=qa_head,
+            max_query_length=args.max_query_length,
+            max_seq_length=args.max_seq_length,
+            doc_stride=args.doc_stride,
+            batch_size=args.batch_size,
+            version_2_with_negative=args.version_2_with_negative,
+            num_gpus=args.num_gpus,
+            batches_per_step=args.batches_per_step,
+            mode="test",
+            use_data_cache=not args.no_data_cache,
         )
-        logging.info(f"training step per epoch: {train_steps_per_epoch}")
-    _, _, eval_output, eval_data_layer = create_pipeline(
-        data_file=args.dev_file,
-        model=model,
-        head=qa_head,
-        loss_fn=squad_loss,
-        max_query_length=args.max_query_length,
-        max_seq_length=args.max_seq_length,
-        doc_stride=args.doc_stride,
-        batch_size=args.batch_size,
-        version_2_with_negative=args.version_2_with_negative,
-        num_gpus=args.num_gpus,
-        batches_per_step=args.batches_per_step,
-        mode="dev",
-    )
 
-    if not args.evaluation_only:
+    if "train" in args.mode:
         logging.info(f"steps_per_epoch = {train_steps_per_epoch}")
-        callback_train = nemo_core.SimpleLossLoggerCallback(
+        train_callback = nemo_core.SimpleLossLoggerCallback(
             tensors=[train_loss],
-            print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
+            print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
             get_tb_values=lambda x: [["loss", x[0]]],
-            step_freq=args.loss_step_freq,
+            step_freq=args.train_step_freq,
             tb_writer=nf.tb_writer,
         )
-
         ckpt_callback = nemo_core.CheckpointCallback(
             folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
         )
-        callbacks_eval = nemo_core.EvaluatorCallback(
-            eval_tensors=eval_output,
-            user_iter_callback=lambda x, y: eval_iter_callback(x, y),
-            user_epochs_done_callback=lambda x: eval_epochs_done_callback(
-                x,
-                eval_data_layer=eval_data_layer,
-                do_lower_case=args.do_lower_case,
-                n_best_size=args.n_best_size,
-                max_answer_length=args.max_answer_length,
-                version_2_with_negative=args.version_2_with_negative,
-                null_score_diff_threshold=args.null_score_diff_threshold,
-            ),
-            tb_writer=nf.tb_writer,
-            eval_step=args.eval_step_freq,
-        )
+        callbacks = [train_callback, ckpt_callback]
+        if "eval" in args.mode:
+            eval_callback = nemo_core.EvaluatorCallback(
+                eval_tensors=eval_output,
+                user_iter_callback=lambda x, y: eval_iter_callback(x, y),
+                user_epochs_done_callback=lambda x: eval_epochs_done_callback(
+                    x,
+                    eval_data_layer=eval_data_layer,
+                    do_lower_case=args.do_lower_case,
+                    n_best_size=args.n_best_size,
+                    max_answer_length=args.max_answer_length,
+                    version_2_with_negative=args.version_2_with_negative,
+                    null_score_diff_threshold=args.null_score_diff_threshold,
+                ),
+                tb_writer=nf.tb_writer,
+                eval_step=args.eval_step_freq,
+            )
+            callbacks.append(eval_callback)
 
-        lr_policy_fn = get_lr_policy(
-            args.lr_policy, total_steps=args.num_epochs * train_steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
-        )
+        optimization_params = {
+            "lr": args.lr,
+            "weight_decay": args.weight_decay,
+        }
+        if args.max_steps < 0:
+            total_steps = args.num_epochs * train_steps_per_epoch
+            optimization_params['num_epochs'] = args.num_epochs
+        else:
+            total_steps = args.max_steps
+            optimization_params['max_steps'] = args.max_steps
+
+        lr_policy_fn = get_lr_policy(args.lr_policy, total_steps=total_steps, warmup_ratio=args.lr_warmup_proportion)
+
+        if args.grad_norm_clip >= 0:
+            optimization_params['grad_norm_clip'] = args.grad_norm_clip
 
         nf.train(
             tensors_to_optimize=[train_loss],
-            callbacks=[callback_train, ckpt_callback, callbacks_eval],
+            callbacks=callbacks,
             lr_policy=lr_policy_fn,
-            optimizer=args.optimizer_kind,
+            optimizer=args.optimizer,
             batches_per_step=args.batches_per_step,
-            optimization_params={"num_epochs": args.num_epochs, "lr": args.lr},
+            optimization_params=optimization_params,
         )
-    else:
 
+    else:
+        load_from_folder = None
         if args.checkpoint_dir is not None:
             load_from_folder = args.checkpoint_dir
-        evaluated_tensors = nf.infer(tensors=eval_output, checkpoint_dir=load_from_folder, cache=True)
+
+        evaluated_tensors = nf.infer(
+            tensors=eval_output, checkpoint_dir=load_from_folder, cache=True, offload_to_cpu=False
+        )
         unique_ids = []
-        start_logits = []
-        end_logits = []
-        for t in evaluated_tensors[2]:
-            unique_ids.extend(t.tolist())
         for t in evaluated_tensors[0]:
-            start_logits.extend(t.tolist())
-        for t in evaluated_tensors[1]:
-            end_logits.extend(t.tolist())
-
-        exact_match, f1, all_predictions = eval_data_layer.dataset.evaluate(
-            unique_ids=unique_ids,
-            start_logits=start_logits,
-            end_logits=end_logits,
-            n_best_size=args.n_best_size,
-            max_answer_length=args.max_answer_length,
-            version_2_with_negative=args.version_2_with_negative,
-            null_score_diff_threshold=args.null_score_diff_threshold,
-            do_lower_case=args.do_lower_case,
-        )
+            unique_ids.extend(t.tolist())
+        if "eval" in args.mode:
+            start_logits = []
+            end_logits = []
+            for t in evaluated_tensors[1]:
+                start_logits.extend(t.tolist())
+            for t in evaluated_tensors[2]:
+                end_logits.extend(t.tolist())
+
+            exact_match, f1, all_predictions, all_nbest = eval_data_layer.dataset.evaluate(
+                unique_ids=unique_ids,
+                start_logits=start_logits,
+                end_logits=end_logits,
+                n_best_size=args.n_best_size,
+                max_answer_length=args.max_answer_length,
+                version_2_with_negative=args.version_2_with_negative,
+                null_score_diff_threshold=args.null_score_diff_threshold,
+                do_lower_case=args.do_lower_case,
+            )
 
-        logging.info(f"exact_match: {exact_match}, f1: {f1}")
+            logging.info(f"exact_match: {exact_match}, f1: {f1}")
+
+        elif "test" in args.mode:
+            logits = []
+            for t in evaluated_tensors[1]:
+                logits.extend(t.tolist())
+            start_logits, end_logits = np.split(np.asarray(logits), 2, axis=-1)
+            (all_predictions, all_nbest, scores_diff) = test_data_layer.dataset.get_predictions(
+                unique_ids=unique_ids,
+                start_logits=start_logits,
+                end_logits=end_logits,
+                n_best_size=args.n_best_size,
+                max_answer_length=args.max_answer_length,
+                version_2_with_negative=args.version_2_with_negative,
+                null_score_diff_threshold=args.null_score_diff_threshold,
+                do_lower_case=args.do_lower_case,
+            )
+        if args.output_nbest_file is not None:
+            with open(args.output_nbest_file, "w") as writer:
+                writer.write(json.dumps(all_nbest, indent=4) + "\n")
         if args.output_prediction_file is not None:
             with open(args.output_prediction_file, "w") as writer:
                 writer.write(json.dumps(all_predictions, indent=4) + "\n")
diff --git a/examples/nlp/scripts/convert_iob_format_to_token_classification_format.py b/examples/nlp/scripts/convert_iob_format_to_token_classification_format.py
deleted file mode 100644
index b07f60b125b8..000000000000
--- a/examples/nlp/scripts/convert_iob_format_to_token_classification_format.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import argparse
-import os
-
-from nemo import logging
-
-
-def __convert_data(in_file, out_text, out_labels):
-    """
-    in_file should be in the IOB format, see example here:
-    https://www.clips.uantwerpen.be/conll2003/ner/.
-
-    After the convertion, the dataset is splitted into 2 files: text.txt
-    and labels.txt.
-    Each line of the text.txt file contains text sequences, where words
-    are separated with spaces. The labels.txt file contains corresponding
-    labels for each word in text.txt, the labels are separated with spaces.
-    Each line of the files should follow the format:
-    [WORD] [SPACE] [WORD] [SPACE] [WORD] (for text.txt) and
-    [LABEL] [SPACE] [LABEL] [SPACE] [LABEL] (for labels.txt).
-
-    """
-    in_file = open(in_file, 'r')
-    with open(out_text, 'w') as text, open(out_labels, 'w') as labels:
-        for line in in_file:
-            if line == '\n':
-                text.write(line)
-                labels.write(line)
-            else:
-                line = line.split()
-                text.write(line[0] + ' ')
-                labels.write(line[-1] + ' ')
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description='Convert data from IOB '
-        + 'format to the format compatible with '
-        + 'nlp/examples/token_classification.py'
-    )
-    parser.add_argument("--data_dir", required=True, type=str)
-    args = parser.parse_args()
-
-    for dataset in ['dev.txt', 'train.txt']:
-        file_path = os.path.join(args.data_dir, dataset)
-        if not os.path.exists(file_path):
-            raise FileNotFoundError(
-                "{file_path} not found in {args.data_dir}"
-                "For NER, CoNLL-2003 dataset"
-                "can be obtained at"
-                "https://github.com/kyzhouhzau/BERT"
-                "-NER/tree/master/data."
-            )
-
-        logging.info(f'Processing {dataset}')
-        out_text = os.path.join(args.data_dir, 'text_' + dataset)
-        out_labels = os.path.join(args.data_dir, 'labels_' + dataset)
-
-        __convert_data(file_path, out_text, out_labels)
-        logging.info(f'Processing of the {dataset} is complete')
diff --git a/examples/nlp/scripts/multiwoz/process_multiwoz.py b/examples/nlp/scripts/multiwoz/process_multiwoz.py
deleted file mode 100644
index 045964446ce6..000000000000
--- a/examples/nlp/scripts/multiwoz/process_multiwoz.py
+++ /dev/null
@@ -1,400 +0,0 @@
-7  #!/usr/bin/python
-
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-# =============================================================================
-# Copyright 2019 Salesforce Research and Paweł Budzianowski.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom
-# the Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
-# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# =============================================================================
-
-"""
-Dataset: http://dialogue.mi.eng.cam.ac.uk/index.php/corpus/
-
-Code based on:
-https://github.com/jasonwu0731/trade-dst
-https://github.com/budzianowski/multiwoz
-"""
-
-import argparse
-import json
-import os
-import re
-import shutil
-
-from nemo.collections.nlp.data.datasets.datasets_utils import if_exist
-
-parser = argparse.ArgumentParser(description='Process MultiWOZ dataset')
-parser.add_argument("--data_dir", default='../../data/statetracking/MULTIWOZ2.1', type=str)
-parser.add_argument("--out_dir", default='../../data/statetracking/multiwoz', type=str)
-args = parser.parse_args()
-
-if not os.path.exists(args.data_dir):
-    raise FileNotFoundError(f"{args.data_dir} doesn't exist.")
-
-DOMAINS = ['restaurant', 'hotel', 'attraction', 'train', 'taxi', 'hospital', 'police']
-PHONE_NUM_TMPL = '\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4,5})'
-POSTCODE_TMPL = (
-    '([a-z]{1}[\. ]?[a-z]{1}[\. ]?\d{1,2}[, ]+\d{1}[\. ]?' + '[a-z]{1}[\. ]?[a-z]{1}|[a-z]{2}\d{2}[a-z]{2})'
-)
-
-REPLACEMENTS = {}
-with open('replacements.txt', 'r') as f:
-    for line in f:
-        word1, word2 = line.strip().split('\t')
-        REPLACEMENTS[word1] = word2
-REPLACEMENTS['-'] = ' '
-REPLACEMENTS[';'] = ','
-REPLACEMENTS['/'] = ' and '
-
-DONT_CARES = set(['dont care', 'dontcare', "don't care", "do not care"])
-
-
-def is_ascii(text):
-    return all(ord(c) < 128 for c in text)
-
-
-def normalize(text):
-    text = text.lower().strip()
-
-    # hotel domain pfb30
-    text = re.sub(r"b&b", "bed and breakfast", text)
-    text = re.sub(r"b and b", "bed and breakfast", text)
-    text = re.sub('[\"\<>@\(\)]', '', text)  # remove brackets
-    text = re.sub(u"(\u2018|\u2019)", "'", text)  # weird unicode bug
-    # add space around punctuations
-    text = re.sub('(\D)([?.,!])', r'\1 \2 ', text)
-
-    clean_tokens = []
-
-    for token in text.split():
-        token = token.strip()
-        if not token:
-            continue
-        if token in REPLACEMENTS:
-            clean_tokens.append(REPLACEMENTS[token])
-        else:
-            clean_tokens.append(token)
-
-    text = ' '.join(clean_tokens)  # remove extra spaces
-    text = re.sub('(\d) (\d)', r'\1\2', text)  # concatenate numbers
-
-    return text
-
-
-def get_goal(idx, log, goals, last_goal):
-    if idx == 1:  # first system's response
-        active_goals = get_summary_belief_state(log[idx]["metadata"], True)
-        return active_goals[0] if len(active_goals) != 0 else goals[0]
-    else:
-        new_goals = get_new_goal(log[idx - 2]["metadata"], log[idx]["metadata"])
-        return last_goal if not new_goals else new_goals[0]
-
-
-def get_summary_belief_state(bstate, get_goal=False):
-    """Based on the mturk annotations we form multi-domain belief state
-    TODO: Figure out why this script has hotel-name but jason's script doesn't
-    (see val_dialogs.json)
-    """
-    summary_bstate, summary_bvalue, active_domain = [], [], []
-    for domain in DOMAINS:
-        domain_active = False
-        booking = []
-
-        for slot in sorted(bstate[domain]['book'].keys()):
-            if slot == 'booked':
-                booking.append(int(len(bstate[domain]['book']['booked']) != 0))
-            else:
-                if bstate[domain]['book'][slot]:
-                    booking.append(1)
-                    curr_bvalue = [f"{domain}-book {slot.strip().lower()}", normalize(bstate[domain]['book'][slot])]
-                    summary_bvalue.append(curr_bvalue)
-                else:
-                    booking.append(0)
-        if domain == 'train':
-            if 'people' not in bstate[domain]['book']:
-                booking.append(0)
-            if 'ticket' not in bstate[domain]['book']:  # TODO: possibly elif
-                booking.append(0)
-        summary_bstate += booking
-
-        for slot in bstate[domain]['semi']:
-            slot_enc = [0, 0, 0]  # not mentioned, dontcare, filled
-            if bstate[domain]['semi'][slot] == 'not mentioned':
-                slot_enc[0] = 1
-            elif bstate[domain]['semi'][slot] in DONT_CARES:
-                slot_enc[1] = 1
-                summary_bvalue.append([f"{domain}-{slot.strip().lower()}", "dontcare"])
-            elif bstate[domain]['semi'][slot]:
-                curr_bvalue = [f"{domain}-{slot.strip().lower()}", normalize(bstate[domain]['semi'][slot])]
-                summary_bvalue.append(curr_bvalue)
-            if sum(slot_enc) > 0:
-                domain_active = True
-            summary_bstate += slot_enc
-
-        if domain_active:  # quasi domain-tracker
-            summary_bstate += [1]
-            active_domain.append(domain)
-        else:
-            summary_bstate += [0]
-
-    assert len(summary_bstate) == 94
-    if get_goal:
-        return active_domain
-    return summary_bstate, summary_bvalue
-
-
-def get_new_goal(prev_turn, curr_turn):
-    """ If multiple domains are updated between turns,
-    return all of them
-    """
-    new_goals = []
-    # Sometimes, metadata is an empty dictionary, bug?
-    if not prev_turn or not curr_turn:
-        return new_goals
-
-    for domain in prev_turn:
-        if curr_turn[domain] != prev_turn[domain]:
-            new_goals.append(domain)
-    return new_goals
-
-
-def get_dialog_act(curr_dialog_acts, act_idx):
-    """Given system dialogue acts fix automatic delexicalization."""
-    acts = []
-    if not act_idx in curr_dialog_acts:
-        return acts
-
-    turn = curr_dialog_acts[act_idx]
-
-    if isinstance(turn, dict):  # it's annotated:
-        for key in turn:
-            key_acts = turn[key]
-            key = key.strip().lower()
-            if key.endswith('request'):
-                for act in key_acts:
-                    acts.append(act[0].lower())
-            elif key.endswith('inform'):
-                for act in key_acts:
-                    acts.append([act[0].lower(), normalize(act[1])])
-    return acts
-
-
-def fix_delex(curr_dialog_acts, act_idx, text):
-    """Given system dialogue acts fix automatic delexicalization."""
-    if not act_idx in curr_dialog_acts:
-        return text
-
-    turn = curr_dialog_acts[act_idx]
-
-    if isinstance(turn, dict):  # it's annotated:
-        for key in turn:
-            if 'Attraction' in key:
-                if 'restaurant_' in text:
-                    text = text.replace("restaurant", "attraction")
-                if 'hotel_' in text:
-                    text = text.replace("hotel", "attraction")
-            if 'Hotel' in key:
-                if 'attraction_' in text:
-                    text = text.replace("attraction", "hotel")
-                if 'restaurant_' in text:
-                    text = text.replace("restaurant", "hotel")
-            if 'Restaurant' in key:
-                if 'attraction_' in text:
-                    text = text.replace("attraction", "restaurant")
-                if 'hotel_' in text:
-                    text = text.replace("hotel", "restaurant")
-
-    return text
-
-
-def create_data(data_dir):
-    data = json.load(open(f'{data_dir}/data.json', 'r'))
-    dialog_acts = json.load(open(f'{data_dir}/dialogue_acts.json', 'r'))
-
-    delex_data = {}
-
-    for dialog_id in data:
-        dialog = data[dialog_id]
-        curr_dialog_acts = dialog_acts[dialog_id.strip('.json')]
-        goals = [key for key in dialog['goal'].keys() if key in DOMAINS and dialog['goal'][key]]
-
-        last_goal, act_idx = '', 1
-        for idx, turn in enumerate(dialog['log']):
-            dialog['log'][idx]['text'] = normalize(turn['text'])
-
-            if idx % 2 == 1:  # system's turn
-                cur_goal = get_goal(idx, dialog['log'], goals, last_goal)
-                last_goal = cur_goal
-
-                dialog['log'][idx - 1]['domain'] = cur_goal  # human's domain
-                dialog['log'][idx]['dialogue_acts'] = get_dialog_act(curr_dialog_acts, str(act_idx))
-                act_idx += 1
-
-            dialog['log'][idx]['text'] = fix_delex(curr_dialog_acts, str(act_idx), dialog['log'][idx]['text'])
-
-        delex_data[dialog_id] = dialog
-    return delex_data
-
-
-def analyze_dialogue(dialog, max_length):
-    """Cleaning procedure for all kinds of errors in text and annotation."""
-    if len(dialog['log']) % 2 == 1:
-        print('Odd number of turns. Wrong dialogue.')
-        return None
-
-    clean_dialog = {}
-    clean_dialog['goal'] = dialog['goal']  # for now we just copy the goal
-    usr_turns, sys_turns = [], []
-
-    for idx in range(len(dialog['log'])):
-        text = dialog['log'][idx]['text']
-        if len(text.split()) > max_length or not is_ascii(text):
-            return None  # sequence corrupted. discard
-
-        if idx % 2 == 0:  # usr turn
-            usr_turns.append(dialog['log'][idx])
-        else:  # sys turn
-            belief_summary, belief_value_summary = get_summary_belief_state(dialog['log'][idx]['metadata'])
-
-            dialog['log'][idx]['belief_summary'] = str(belief_summary)
-            dialog['log'][idx]['belief_value_summary'] = belief_value_summary
-            sys_turns.append(dialog['log'][idx])
-
-    clean_dialog['usr_log'] = usr_turns
-    clean_dialog['sys_log'] = sys_turns
-
-    return clean_dialog
-
-
-def get_dialog(dialog, max_length=50):
-    """Extract a dialogue from the file"""
-    dialog = analyze_dialogue(dialog, max_length)
-    if dialog is None:
-        return None
-
-    dialogs = []
-    for idx in range(len(dialog['usr_log'])):
-        dialogs.append(
-            {
-                'usr': dialog['usr_log'][idx]['text'],
-                'sys': dialog['sys_log'][idx]['text'],
-                'sys_a': dialog['sys_log'][idx]['dialogue_acts'],
-                'domain': dialog['usr_log'][idx]['domain'],
-                'bvs': dialog['sys_log'][idx]['belief_value_summary'],
-            }
-        )
-
-    return dialogs
-
-
-def partition_data(data, infold, outfold):
-    """Partition the data into train, valid, and test sets
-    based on the list of val and test specified in the dataset.
-    """
-    if if_exist(
-        outfold, ['trainListFile.json', 'val_dialogs.json', 'test_dialogs.json', 'train_dialogs.json', 'ontology.json']
-    ):
-        print(f'Data is already processed and stored at {outfold}')
-        return
-    os.makedirs(outfold, exist_ok=True)
-    shutil.copyfile(f'{infold}/ontology.json', f'{outfold}/ontology.json')
-
-    with open(f'{infold}/testListFile.json', 'r') as fin:
-        test_files = [line.strip() for line in fin.readlines()]
-
-    with open(f'{infold}/valListFile.json', 'r') as fin:
-        val_files = [line.strip() for line in fin.readlines()]
-
-    train_list_files = open(f'{outfold}/trainListFile.json', 'w')
-
-    train_dialogs, val_dialogs, test_dialogs = [], [], []
-    count_train, count_val, count_test = 0, 0, 0
-
-    for dialog_id in data:
-        dialog = data[dialog_id]
-        domains = [key for key in dialog['goal'].keys() if key in DOMAINS and dialog['goal'][key]]
-
-        dial = get_dialog(dialog)
-        if dial:
-            dialogue = {}
-            dialogue['dialog_idx'] = dialog_id
-            dialogue['domains'] = list(set(domains))
-            last_bs = []
-            dialogue['dialog'] = []
-
-            for idx, turn in enumerate(dial):
-                turn_dl = {
-                    'sys_transcript': dial[idx - 1]['sys'] if idx > 0 else "",
-                    'turn_idx': idx,
-                    'transcript': turn['usr'],
-                    'sys_acts': dial[idx - 1]['sys_a'] if idx > 0 else [],
-                    'domain': turn['domain'],
-                }
-                turn_dl['belief_state'] = [{"slots": [s], "act": "inform"} for s in turn['bvs']]
-                turn_dl['turn_label'] = [bs["slots"][0] for bs in turn_dl['belief_state'] if bs not in last_bs]
-                last_bs = turn_dl['belief_state']
-                dialogue['dialog'].append(turn_dl)
-
-            if dialog_id in test_files:
-                test_dialogs.append(dialogue)
-                count_test += 1
-            elif dialog_id in val_files:
-                val_dialogs.append(dialogue)
-                count_val += 1
-            else:
-                train_list_files.write(dialog_id + '\n')
-                train_dialogs.append(dialogue)
-                count_train += 1
-
-    print(f"Dialogs: {count_train} train, {count_val} val, {count_test} test.")
-
-    # save all dialogues
-    with open(f'{outfold}/val_dialogs.json', 'w') as fout:
-        json.dump(val_dialogs, fout, indent=4)
-
-    with open(f'{outfold}/test_dialogs.json', 'w') as fout:
-        json.dump(test_dialogs, fout, indent=4)
-
-    with open(f'{outfold}/train_dialogs.json', 'w') as fout:
-        json.dump(train_dialogs, fout, indent=4)
-
-    train_list_files.close()
-
-
-def process_woz():
-    delex_data = create_data(args.data_dir)
-    partition_data(delex_data, args.data_dir, args.out_dir)
-
-
-process_woz()
diff --git a/examples/nlp/text_classification/data/import_datasets.py b/examples/nlp/text_classification/data/import_datasets.py
new file mode 100644
index 000000000000..fc5f76e3db11
--- /dev/null
+++ b/examples/nlp/text_classification/data/import_datasets.py
@@ -0,0 +1,255 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+import csv
+import glob
+import json
+import os
+from os.path import exists
+
+import tqdm
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import DATABASE_EXISTS_TMP, if_exist
+
+
+def process_imdb(infold, outfold, uncased, modes=['train', 'test']):
+    if not os.path.exists(infold):
+        link = 'www.kaggle.com/iarunava/imdb-movie-reviews-dataset'
+        raise ValueError(f'Data not found at {infold}. ' f'Please download IMDB from {link}.')
+
+    logging.info(f'Processing IMDB dataset and store at {outfold}')
+    os.makedirs(outfold, exist_ok=True)
+
+    outfiles = {}
+    for mode in modes:
+        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
+        outfiles[mode].write('sentence\tlabel\n')
+        for sent in ['neg', 'pos']:
+            if sent == 'neg':
+                label = 0
+            else:
+                label = 1
+            files = glob.glob(f'{infold}/{mode}/{sent}/*.txt')
+            for file in files:
+                with open(file, 'r') as f:
+                    review = f.read().strip()
+                if uncased:
+                    review = review.lower()
+                review = review.replace("<br />", "")
+                outfiles[mode].write(f'{review}\t{label}\n')
+    for mode in modes:
+        outfiles[mode].close()
+
+
+def process_chemprot(source_dir, target_dir, uncased, modes=['train', 'test', 'dev']):
+    if not os.path.exists(source_dir):
+        link = 'https://github.com/arwhirang/recursive_chemprot/tree/master/Demo/tree_LSTM/data'
+        raise ValueError(f'Data not found at {source_dir}. ' f'Please download ChemProt from {link}.')
+
+    logging.info(f'Processing Chemprot dataset and store at {target_dir}')
+    os.makedirs(target_dir, exist_ok=True)
+
+    naming_map = {'train': 'trainingPosit_chem', 'test': 'testPosit_chem', 'dev': 'developPosit_chem'}
+
+    def _read_tsv(input_file, quotechar=None):
+        """Reads a tab separated value file."""
+        with open(input_file, "r") as f:
+            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+            lines = []
+            for line in reader:
+                lines.append(line)
+            return lines
+
+    outfiles = {}
+    label_mapping = {}
+    out_label_mapping = open(os.path.join(target_dir, 'label_mapping.tsv'), 'w')
+    for mode in modes:
+        outfiles[mode] = open(os.path.join(target_dir, mode + '.tsv'), 'w')
+        outfiles[mode].write('sentence\tlabel\n')
+        input_file = os.path.join(source_dir, naming_map[mode])
+        lines = _read_tsv(input_file)
+        for line in lines:
+            text = line[1]
+            label = line[2]
+            if label == "True":
+                label = line[3]
+            if uncased:
+                text = text.lower()
+            if label not in label_mapping:
+                out_label_mapping.write(f'{label}\t{len(label_mapping)}\n')
+                label_mapping[label] = len(label_mapping)
+            label = label_mapping[label]
+            outfiles[mode].write(f'{text}\t{label}\n')
+    for mode in modes:
+        outfiles[mode].close()
+    out_label_mapping.close()
+
+
+def process_thucnews(infold, outfold):
+    modes = ['train', 'test']
+    train_size = 0.8
+    if not os.path.exists(infold):
+        link = 'thuctc.thunlp.org/'
+        raise ValueError(f'Data not found at {infold}. ' f'Please download THUCNews from {link}.')
+
+    logging.info(f'Processing THUCNews dataset and store at {outfold}')
+    os.makedirs(outfold, exist_ok=True)
+
+    outfiles = {}
+    for mode in modes:
+        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'a+', encoding='utf-8')
+        outfiles[mode].write('sentence\tlabel\n')
+    categories = ['体育', '娱乐', '家居', '彩票', '房产', '教育', '时尚', '时政', '星座', '游戏', '社会', '科技', '股票', '财经']
+    for category in categories:
+        label = categories.index(category)
+        category_files = glob.glob(f'{infold}/{category}/*.txt')
+        test_num = int(len(category_files) * (1 - train_size))
+        test_files = category_files[:test_num]
+        train_files = category_files[test_num:]
+
+        for mode in modes:
+            logging.info(f'Processing {mode} data of the category {category}')
+            if mode == 'test':
+                files = test_files
+            else:
+                files = train_files
+
+            if len(files) == 0:
+                logging.info(f'Skipping category {category} for {mode} mode')
+                continue
+
+            for file in tqdm.tqdm(files):
+                with open(file, 'r', encoding='utf-8') as f:
+                    news = f.read().strip().replace('\r', '')
+                    news = news.replace('\n', '').replace('\t', ' ')
+                    outfiles[mode].write(f'{news}\t{label}\n')
+    for mode in modes:
+        outfiles[mode].close()
+
+
+def process_nlu(filename, outfold, uncased, dataset_name, modes=['train', 'test']):
+    """ Dataset has to be of:
+    - ubuntu
+    - chat
+    - web
+    """
+
+    if not os.path.exists(filename):
+        link = 'https://github.com/sebischair/NLU-Evaluation-Corpora'
+        raise ValueError(f'Data not found at {filename}. ' f'Please download IMDB from {link}.')
+
+    if dataset_name == 'nlu-ubuntu':
+        INTENT = {'makeupdate': 1, 'setupprinter': 2, 'shutdowncomputer': 3, 'softwarerecommendation': 4, 'none': 0}
+    elif dataset_name == 'nlu-chat':
+        INTENT = {'departuretime': 0, 'findconnection': 1}
+    elif dataset_name == 'nlu-web':
+        INTENT = {
+            'changepassword': 1,
+            'deleteaccount': 2,
+            'downloadvideo': 3,
+            'exportdata': 4,
+            'filterspam': 5,
+            'findalternative': 6,
+            'syncaccounts': 7,
+            'none': 0,
+        }
+    else:
+        raise ValueError(f'{dataset_name}: Invalid dataset name')
+
+    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
+        logging.info(DATABASE_EXISTS_TMP.format(dataset_name.upper(), outfold))
+        return outfold
+    logging.info(f'Processing data and store at {outfold}')
+
+    os.makedirs(outfold, exist_ok=True)
+
+    outfiles = {}
+    for mode in modes:
+        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
+        outfiles[mode].write('sentence\tlabel\n')
+
+    with open(filename, 'r') as f:
+        data = json.load(f)
+
+    for obj in data['sentences']:
+        sentence = obj['text'].strip()
+        if uncased:
+            sentence = sentence.lower()
+        intent = obj['intent'].lower().replace(' ', '')
+        label = INTENT[intent]
+        txt = f'{sentence}\t{label}\n'
+        if obj['training']:
+            outfiles['train'].write(txt)
+        else:
+            outfiles['test'].write(txt)
+    for mode in modes:
+        outfiles[mode].close()
+
+
+if __name__ == "__main__":
+    # Parse the command-line arguments.
+    parser = argparse.ArgumentParser(description="Process and convert datasets into NeMo\'s format.")
+    parser.add_argument(
+        "--dataset_name",
+        required=True,
+        type=str,
+        choices=['sst-2', 'imdb', 'thucnews', 'nlu-chat', 'nlu-ubuntu', 'nlu-web', 'chemprot'],
+    )
+    parser.add_argument(
+        "--source_data_dir", required=True, type=str, help='The path to the folder containing the dataset files.'
+    )
+    parser.add_argument("--target_data_dir", required=True, type=str)
+    parser.add_argument("--do_lower_case", action='store_true')
+    parser.add_argument(
+        "--ignore_prev_intent",
+        action='store_true',
+        help='ignores previous intent while importing datasets in jarvis\'s format',
+    )
+    args = parser.parse_args()
+
+    dataset_name = args.dataset_name
+    do_lower_case = args.do_lower_case
+    source_dir = args.source_data_dir
+    target_dir = args.target_data_dir
+
+    if not exists(source_dir):
+        raise FileNotFoundError(f"{source_dir} does not exist.")
+
+    if dataset_name == 'sst-2':
+        logging.info("sst-2 is compatible with NeMo's format and no need for conversion.")
+    elif dataset_name == 'imdb':
+        process_imdb(source_dir, target_dir, do_lower_case)
+    elif dataset_name == 'thucnews':
+        process_thucnews(source_dir, target_dir)
+    elif dataset_name.startswith('nlu-'):
+        if dataset_name == 'nlu-chat':
+            infile = f'{source_dir}/ChatbotCorpus.json'
+        elif dataset_name == 'nlu-ubuntu':
+            infile = f'{source_dir}/AskUbuntuCorpus.json'
+        elif dataset_name == 'nlu-web':
+            infile = f'{source_dir}/WebApplicationsCorpus.json'
+        process_nlu(filename=infile, outfold=target_dir, uncased=do_lower_case, dataset_name=dataset_name)
+    elif dataset_name == "chemprot":
+        process_chemprot(source_dir, target_dir, do_lower_case)
+    else:
+        raise ValueError(
+            f'Dataset {dataset_name} is not supported.'
+            + "Please make sure that you build the preprocessing process for it. "
+            + "NeMo's format assumes that a data file has a header and each line of the file follows "
+            + "the format: text [TAB] label. Label is assumed to be an integer."
+        )
diff --git a/examples/nlp/text_classification/text_classification_with_bert.ipynb b/examples/nlp/text_classification/text_classification_with_bert.ipynb
new file mode 100644
index 000000000000..eb957c8ad506
--- /dev/null
+++ b/examples/nlp/text_classification/text_classification_with_bert.ipynb
@@ -0,0 +1,877 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nemo\n",
+    "import nemo.collections.nlp as nemo_nlp\n",
+    "from nemo.collections.nlp.data.datasets import BertTextClassificationDataset\n",
+    "from nemo.collections.nlp.nm.data_layers.text_classification_datalayer import BertTextClassificationDataLayer\n",
+    "from nemo.collections.nlp.nm.trainables import SequenceClassifier\n",
+    "\n",
+    "from nemo.backends.pytorch.common import CrossEntropyLossNM\n",
+    "from nemo.utils.lr_policies import get_lr_policy\n",
+    "from nemo.collections.nlp.callbacks.text_classification_callback import eval_iter_callback, eval_epochs_done_callback\n",
+    "\n",
+    "import os\n",
+    "import json\n",
+    "import math\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "pd.options.display.max_colwidth = -1\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.manifold import TSNE\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Explore\n",
+    "\n",
+    "The SST-2 dataset https://nlp.stanford.edu/sentiment/index.html is a standard benchmark for sentence classification and is part of the GLUE Benchmark: https://gluebenchmark.com/tasks. Please download and unzip the SST-2 dataset from GLUE."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "WORK_DIR = 'logs'\n",
+    "DATA_DIR = 'data/SST-2'\n",
+    "\n",
+    "# To use mixed precision, set AMP_OPTIMIZATION_LEVEL to 'O1' or 'O2',\n",
+    "# to train without mixed precision, set it to 'O0'.\n",
+    "AMP_OPTIMIZATION_LEVEL = 'O1'\n",
+    "PRETRAINED_MODEL_NAME = 'bert-base-uncased'\n",
+    "MAX_SEQ_LEN = 64 # we will pad with 0's shorter sentences and truncate longer\n",
+    "BATCH_SIZE = 256 # 64 for 'bert-large-uncased'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(DATA_DIR + '/train.tsv', sep='\\t')\n",
+    "test_df = pd.read_csv(DATA_DIR + '/test.tsv', sep='\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The dataset comes with a train file (labeled) and a test file (not labeled).  We will use part of the train file for model validation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split train to train and val and save to disk\n",
+    "np.random.seed(123)\n",
+    "train_mask = np.random.rand((len(df))) < .8\n",
+    "train_df = df[train_mask]\n",
+    "val_df = df[~train_mask]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In order to take advantage of NeMo's pre-built sentence classification data layer, the data should be formatted as \"sentence\\tlabel\" (sentence tab label)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We will add a label column with all 0's (but they will not be used for anything).\n",
+    "test_df['label'] = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df = test_df[['sentence', 'label']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Save new train, val, and test to disk\n",
+    "SPLIT_DATA_DIR = os.path.join(DATA_DIR, 'split')\n",
+    "\n",
+    "os.makedirs(SPLIT_DATA_DIR, exist_ok=True)\n",
+    "\n",
+    "train_df.to_csv(os.path.join(SPLIT_DATA_DIR, 'train.tsv'), sep='\\t', index=False)\n",
+    "val_df.to_csv(os.path.join(SPLIT_DATA_DIR, 'eval.tsv'), sep='\\t', index=False)\n",
+    "test_df.to_csv(os.path.join(SPLIT_DATA_DIR, 'test.tsv'), sep='\\t', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Neural Modules\n",
+    "\n",
+    "In NeMo, everything is a Neural Module. Neural modules abstract data and neural network architectures. Where a deep learning framework like PyTorch or Tensorflow is used to combine neural network layers to create a neural network. \n",
+    "NeMo is used to combine data and neural networks to create AI applications.\n",
+    "The Neural Module Factory will then manage the neural modules, taking care to flow data through the neural modules, and is also responsible for training (including mixed precision and distributed), logging, and inference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# instantiate the neural module factory\n",
+    "nf = nemo.core.NeuralModuleFactory(log_dir=WORK_DIR,\n",
+    "                                   create_tb_writer=True,\n",
+    "                                   add_time_to_log_dir=False,\n",
+    "                                   optimization_level=AMP_OPTIMIZATION_LEVEL)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Pre-trained models will be automatically downloaded and cached."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pre-trained BERT\n",
+    "bert = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=PRETRAINED_MODEL_NAME)\n",
+    "tokenizer = nemo_nlp.data.NemoBertTokenizer(PRETRAINED_MODEL_NAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note here that the BERT models we are working with are massive. This gives our models a large capacity for learning that is needed to understand the nuance and complexity of natural language."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f'{PRETRAINED_MODEL_NAME} has {bert.num_weights} weights')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we define and instantiate the feed forward network that takes as input our BERT embeddings. This network will be used to output the sentence classifications."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mlp classifier\n",
+    "bert_hidden_size = bert.hidden_size\n",
+    "\n",
+    "mlp = SequenceClassifier(hidden_size=bert_hidden_size, \n",
+    "                         num_classes=2,\n",
+    "                         num_layers=2,\n",
+    "                         log_softmax=False,\n",
+    "                         dropout=0.1)\n",
+    "\n",
+    "loss = CrossEntropyLossNM()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compared to the BERT model, the MLP is tiny.\n",
+    "print(f'MLP has {mlp.num_weights} weights')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pipelines\n",
+    "\n",
+    "Pipelines are used to define how data will flow the different neural networks. In this case, our data will flow through the BERT network and then the MLP network.\n",
+    "\n",
+    "We also have different pipelines for training, validation, and inference data.  \n",
+    "\n",
+    "For training data, we want it to be used for optimization so it must be shuffled and we also need to compute the loss.\n",
+    "\n",
+    "For validation data, we won't use it for optimization but we want to know the loss.\n",
+    "\n",
+    "And for inference data, we only want the final predictions coming from the model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Layers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can gain a lot of efficiency by saving the tokenized data to disk. For future model runs we then don't need to tokenize every time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USE_CACHE = True\n",
+    "\n",
+    "train_data = BertTextClassificationDataLayer(input_file=os.path.join(SPLIT_DATA_DIR, 'train.tsv'),\n",
+    "                                             tokenizer=tokenizer,\n",
+    "                                             max_seq_length=MAX_SEQ_LEN,\n",
+    "                                             shuffle=True,\n",
+    "                                             batch_size=BATCH_SIZE,\n",
+    "                                             use_cache=USE_CACHE)\n",
+    "\n",
+    "val_data = BertTextClassificationDataLayer(input_file=os.path.join(SPLIT_DATA_DIR, 'eval.tsv'),\n",
+    "                                           tokenizer=tokenizer,\n",
+    "                                           max_seq_length=MAX_SEQ_LEN,\n",
+    "                                           batch_size=BATCH_SIZE,\n",
+    "                                           use_cache=USE_CACHE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_input, train_token_types, train_attn_mask, train_labels = train_data()\n",
+    "val_input, val_token_types, val_attn_mask, val_labels = val_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## BERT Embeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_embeddings = bert(input_ids=train_input,\n",
+    "                        token_type_ids=train_token_types,\n",
+    "                        attention_mask=train_attn_mask)\n",
+    "val_embeddings = bert(input_ids=val_input,\n",
+    "                      token_type_ids=val_token_types,\n",
+    "                      attention_mask=val_attn_mask)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect BERT Embeddings\n",
+    "\n",
+    "If we want to inspect the data as it flows through our neural factory we can use the .infer method.  This method will give us the tensors without performing any optimization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "val_input_tensors = nf.infer(tensors=[val_input])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(val_input_tensors[0][0][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "val_embeddings_tensors = nf.infer(tensors=[val_embeddings])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# each word is embedded into bert_hidden_size space\n",
+    "# shape: BATCH_SIZE * MAX_SEQ_LEN * bert_hidden_size\n",
+    "val_embeddings_tensors[0][0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(val_embeddings_tensors[0][0][1][:, 0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Understanding and Visualizing BERT Embeddings\n",
+    "\n",
+    "We are going to look at the BERT embeddings for the words (1-word sentences) in \"SPLIT_DATA_DIR/positive_negative.tsv\". Since the BERT embeddings are 768 dimensional for BERT base and 1024 dimensional for BERT large, we'll first apply TSNE and reduce the embeddings to two dimensions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_words = ['abysmal', 'apalling', 'dreadful', 'awful', 'terrible',\n",
+    "                  'very bad', 'really bad', 'rubbish', 'unsatisfactory',\n",
+    "                  'bad', 'poor', 'great', 'really good', 'very good', 'awesome'\n",
+    "                  'fantastic', 'superb', 'brilliant', 'incredible', 'excellent'\n",
+    "                  'outstanding', 'perfect']\n",
+    "\n",
+    "spectrum_file = os.path.join(SPLIT_DATA_DIR, 'positive_negative.tsv')\n",
+    "with open(spectrum_file, 'w+') as f:\n",
+    "    f.write('sentence\\tlabel')\n",
+    "    for word in spectrum_words:\n",
+    "        f.write('\\n' + word + '\\t0')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_df = pd.read_csv(spectrum_file, delimiter='\\t')\n",
+    "print(spectrum_df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# positive negative spectrum\n",
+    "spectrum_data = BertTextClassificationDataLayer(input_file=spectrum_file,\n",
+    "                                                tokenizer=tokenizer,\n",
+    "                                                max_seq_length=MAX_SEQ_LEN,\n",
+    "                                                batch_size=BATCH_SIZE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_input, spectrum_token_types, spectrum_attn_mask, spectrum_labels = spectrum_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_embeddings = bert(input_ids=spectrum_input,\n",
+    "                           token_type_ids=spectrum_token_types,\n",
+    "                           attention_mask=spectrum_attn_mask)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_embeddings_tensors = nf.infer(tensors=[spectrum_embeddings])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_embeddings_tensors[0][0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(100,100))\n",
+    "plt.imshow(spectrum_embeddings_tensors[0][0][:,0,:].numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_activations = spectrum_embeddings_tensors[0][0][:,0,:].numpy()\n",
+    "tsne_spectrum = TSNE(n_components=2, perplexity=10, verbose=1, learning_rate=2,\n",
+    "                     random_state=123).fit_transform(spectrum_activations)\n",
+    "\n",
+    "fig = plt.figure(figsize=(10,10))\n",
+    "plt.plot(tsne_spectrum[0:11, 0], tsne_spectrum[0:11, 1], 'rx')\n",
+    "plt.plot(tsne_spectrum[11:, 0], tsne_spectrum[11:, 1], 'bo')\n",
+    "for (x,y, label) in zip(tsne_spectrum[0:, 0], tsne_spectrum[0:, 1], spectrum_df.sentence.values.tolist() ):\n",
+    "    plt.annotate(label, # this is the text\n",
+    "                 (x,y), # this is the point to label\n",
+    "                 textcoords=\"offset points\", # how to position the text\n",
+    "                 xytext=(0,10), # distance from text to points (x,y)\n",
+    "                 ha='center') # horizontal alignment can be left, right or center"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training Pipeline \n",
+    "\n",
+    "In order to optimize our network, we need to pass the embeddings through the MLP network and then compute the loss."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_logits = mlp(hidden_states=train_embeddings)\n",
+    "val_logits = mlp(hidden_states=val_embeddings)\n",
+    "\n",
+    "train_loss = loss(logits=train_logits, labels=train_labels)\n",
+    "val_loss = loss(logits=val_logits, labels=val_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Callbacks\n",
+    "\n",
+    "Callbacks are used to record and log metrics and save checkpoints for the training and evaluation. We use callbacks to print to screen and also to tensorboard.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NUM_EPOCHS = 3\n",
+    "NUM_GPUS = 1\n",
+    "LEARNING_RATE = 5e-5\n",
+    "OPTIMIZER = 'adam'\n",
+    "\n",
+    "train_data_size = len(train_data)\n",
+    "steps_per_epoch = math.ceil(train_data_size / (BATCH_SIZE * NUM_GPUS))\n",
+    "\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(tensors=[train_loss, train_logits],\n",
+    "                            print_func=lambda x:nemo.logging.info(f'Train loss: {str(np.round(x[0].item(), 3))}'),\n",
+    "                            tb_writer=nf.tb_writer,\n",
+    "                            get_tb_values=lambda x: [[\"train_loss\", x[0]]],\n",
+    "                            step_freq=steps_per_epoch)\n",
+    "\n",
+    "eval_callback = nemo.core.EvaluatorCallback(eval_tensors=[val_logits, val_labels],\n",
+    "                                            user_iter_callback=lambda x, y: eval_iter_callback(x, y, val_data),\n",
+    "                                            user_epochs_done_callback=lambda x:\n",
+    "                                                eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),\n",
+    "                                            tb_writer=nf.tb_writer,\n",
+    "                                            eval_step=steps_per_epoch)\n",
+    "\n",
+    "# Create callback to save checkpoints\n",
+    "ckpt_callback = nemo.core.CheckpointCallback(folder=nf.checkpoint_dir,\n",
+    "                                             epoch_freq=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lr_policy_fn = get_lr_policy('WarmupAnnealing',\n",
+    "                             total_steps=NUM_EPOCHS * steps_per_epoch,\n",
+    "                             warmup_ratio=0.1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "nf.train(tensors_to_optimize=[train_loss],\n",
+    "         callbacks=[train_callback, eval_callback, ckpt_callback],\n",
+    "         lr_policy=lr_policy_fn,\n",
+    "         optimizer=OPTIMIZER,\n",
+    "         optimization_params={'num_epochs': NUM_EPOCHS, 'lr': LEARNING_RATE})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Multi-Gpu Training\n",
+    "\n",
+    "RESTART KERNEL BEFORE RUNNING THE MULTI-GPU TRAINING"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "num_gpus = 4\n",
+    "!python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS text_classification_with_bert.py \\\n",
+    "--pretrained_model_name $PRETRAINED_MODEL_NAME \\\n",
+    "--data_dir $SPLIT_DATA_DIR \\\n",
+    "--train_file_prefix 'train' \\\n",
+    "--eval_file_prefix 'eval' \\\n",
+    "--use_cache \\\n",
+    "--batch_size 64 \\\n",
+    "--max_seq_length 64 \\\n",
+    "--num_gpus $NUM_GPUS \\\n",
+    "--num_epochs $NUM_EPOCHS \\\n",
+    "--amp_opt_level $AMP_OPTIMIZATION_LEVEL \\\n",
+    "--work_dir $WORK_DIR"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference Pipeline\n",
+    "\n",
+    "For inference we instantiate the same neural modules but now we will be using the checkpoints that we just learned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = BertTextClassificationDataLayer(input_file=os.path.join(SPLIT_DATA_DIR, 'test.tsv'),\n",
+    "                                            tokenizer=tokenizer,\n",
+    "                                            max_seq_length=MAX_SEQ_LEN,\n",
+    "                                            batch_size=BATCH_SIZE)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_input, test_token_types, test_attn_mask, _ = test_data()\n",
+    "test_embeddings = bert(input_ids=test_input,\n",
+    "                        token_type_ids=test_token_types,\n",
+    "                        attention_mask=test_attn_mask)\n",
+    "test_logits = mlp(hidden_states=test_embeddings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "test_logits_tensors = nf.infer(tensors=[test_logits])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_probs = torch.nn.functional.softmax(torch.cat(test_logits_tensors[0])).numpy()[:, 1] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df = pd.read_csv(os.path.join(SPLIT_DATA_DIR, 'test.tsv'), sep='\\t')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df['prob'] = test_probs \n",
+    "inference_file = os.path.join(SPLIT_DATA_DIR, 'test_inference.tsv')\n",
+    "test_df.to_csv(inference_file, sep='\\t', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sample_classification(data_path):\n",
+    "    df = pd.read_csv(data_path, sep='\\t')\n",
+    "    sample = df.sample()\n",
+    "    sentence = sample.sentence.values[0]\n",
+    "    prob = sample.prob.values[0]\n",
+    "    result = f'{sentence} | {prob}'\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_samples = 10\n",
+    "for _ in range(num_samples):\n",
+    "    print(sample_classification(inference_file))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference Results:\n",
+    "the film is just a big , gorgeous , mind-blowing , breath-taking mess . | 0.2738656\n",
+    "\n",
+    "a sensual performance from abbass buoys the flimsy story , but her inner journey is largely unexplored and we 're left wondering about this exotic-looking woman whose emotional depths are only hinted at . | 0.48260054"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Single sentence classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def classify_sentence(nf, tokenizer, bert, mlp, sentence):\n",
+    "    sentence = sentence.lower()\n",
+    "    tmp_file = \"/tmp/tmp_sentence.tsv\"\n",
+    "    with open(tmp_file, 'w+') as tmp_tsv:\n",
+    "        header = 'sentence\\tlabel\\n'\n",
+    "        line = sentence + '\\t0\\n'\n",
+    "        tmp_tsv.writelines([header, line])\n",
+    "\n",
+    "    tmp_data = BertTextClassificationDataLayer(input_file=tmp_file,\n",
+    "                                               tokenizer=tokenizer,\n",
+    "                                               max_seq_length=128,\n",
+    "                                               batch_size=1)\n",
+    "    \n",
+    "    tmp_input, tmp_token_types, tmp_attn_mask, _ = tmp_data()\n",
+    "    tmp_embeddings = bert(input_ids=tmp_input,\n",
+    "                          token_type_ids=tmp_token_types,\n",
+    "                          attention_mask=tmp_attn_mask)\n",
+    "    tmp_logits = mlp(hidden_states=tmp_embeddings)\n",
+    "    tmp_logits_tensors = nf.infer(tensors=[tmp_logits, tmp_embeddings])\n",
+    "    tmp_probs = torch.nn.functional.softmax(torch.cat(tmp_logits_tensors[0])).numpy()[:, 1] \n",
+    "    print(f'{sentence} | {tmp_probs[0]}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sentences = ['point break is the best movie of all time',\n",
+    "             'the movie was a wonderful exercise in understanding the struggles of native americans',\n",
+    "             'the performance of diego luna had me excited and annoyed at the same time',\n",
+    "             'matt damon is the only good thing about this film']\n",
+    "\n",
+    "for sentence in sentences:\n",
+    "    classify_sentence(nf, tokenizer, bert, mlp, sentence)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Understanding and Visualizing BERT Embeddings\n",
+    "\n",
+    "Now that we've fine-tuned our BERT model, let's see if the word embeddings have changed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_embeddings = bert(input_ids=spectrum_input,\n",
+    "                           token_type_ids=spectrum_token_types,\n",
+    "                           attention_mask=spectrum_attn_mask)\n",
+    "\n",
+    "spectrum_embeddings_tensors = nf.infer(tensors=[spectrum_embeddings])\n",
+    "\n",
+    "plt.figure(figsize=(100,100))\n",
+    "plt.imshow(spectrum_embeddings_tensors[0][0][:,0,:].numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spectrum_activations = spectrum_embeddings_tensors[0][0][:,0,:].numpy()\n",
+    "tsne_spectrum = TSNE(n_components=2, perplexity=10, verbose=1, learning_rate=2,\n",
+    "                     random_state=123).fit_transform(spectrum_activations)\n",
+    "\n",
+    "fig = plt.figure(figsize=(10,10))\n",
+    "plt.plot(tsne_spectrum[0:11, 0], tsne_spectrum[0:11, 1], 'rx')\n",
+    "plt.plot(tsne_spectrum[11:, 0], tsne_spectrum[11:, 1], 'bo')\n",
+    "for (x,y, label) in zip(tsne_spectrum[0:, 0], tsne_spectrum[0:, 1], spectrum_df.sentence.values.tolist() ):\n",
+    "    plt.annotate(label, # this is the text\n",
+    "                 (x,y), # this is the point to label\n",
+    "                 textcoords=\"offset points\", # how to position the text\n",
+    "                 xytext=(0,10), # distance from text to points (x,y)\n",
+    "                 ha='center') # horizontal alignment can be left, right or center"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/nlp/text_classification/text_classification_with_bert.py b/examples/nlp/text_classification/text_classification_with_bert.py
index 62048e5b4945..4f4aeaf58b6b 100644
--- a/examples/nlp/text_classification/text_classification_with_bert.py
+++ b/examples/nlp/text_classification/text_classification_with_bert.py
@@ -17,108 +17,135 @@
 import argparse
 import math
 
-import numpy as np
-from transformers import BertTokenizer
-
-import nemo.collections.nlp.nm.data_layers.text_classification_datalayer
-import nemo.collections.nlp.nm.trainables.common.sequence_classification_nm
+import nemo
+import nemo.collections.nlp as nemo_nlp
 from nemo import logging
 from nemo.collections.nlp.callbacks.text_classification_callback import eval_epochs_done_callback, eval_iter_callback
-from nemo.collections.nlp.data.datasets.text_classification_dataset import SentenceClassificationDataDesc
+from nemo.collections.nlp.data.datasets import TextClassificationDataDesc
 from nemo.utils.lr_policies import get_lr_policy
 
 # Parsing arguments
 parser = argparse.ArgumentParser(description='Sentence classification with pretrained BERT')
-parser.add_argument("--local_rank", default=None, type=int)
+parser.add_argument("--work_dir", default='outputs', type=str)
+parser.add_argument("--data_dir", required=True, type=str)
+parser.add_argument(
+    '--pretrained_model_name',
+    default='roberta-base',
+    type=str,
+    help='Name of the pre-trained model',
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_checkpoint", default=None, type=str)
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "For tokenizer only applicable when tokenizer is build with vocab file.",
+)
 parser.add_argument("--batch_size", default=32, type=int)
 parser.add_argument("--max_seq_length", default=36, type=int)
 parser.add_argument("--num_gpus", default=1, type=int)
+parser.add_argument("--num_output_layers", default=1, type=int)
 parser.add_argument("--num_epochs", default=10, type=int)
 parser.add_argument("--num_train_samples", default=-1, type=int)
 parser.add_argument("--num_eval_samples", default=-1, type=int)
+parser.add_argument("--optimizer_kind", default="adam", type=str)
 parser.add_argument("--lr_warmup_proportion", default=0.1, type=float)
 parser.add_argument("--lr", default=2e-5, type=float)
 parser.add_argument("--lr_policy", default="WarmupAnnealing", type=str)
+parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
 parser.add_argument("--weight_decay", default=0.01, type=float)
 parser.add_argument("--fc_dropout", default=0.1, type=float)
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
-parser.add_argument("--bert_checkpoint", default="", type=str)
-parser.add_argument("--bert_config", default="", type=str)
-parser.add_argument("--data_dir", required=True, type=str)
 parser.add_argument(
-    "--dataset_name",
-    required=True,
-    type=str,
-    choices=["sst-2", "imdb", "thucnews", "jarvis", "nlu-ubuntu", "nlu-web", "nlu-chat"],
+    "--use_cache", action='store_true', help="When specified loads and stores cache preprocessed data."
 )
 parser.add_argument("--train_file_prefix", default='train', type=str)
-parser.add_argument("--eval_file_prefix", default='test', type=str)
-parser.add_argument("--work_dir", default='outputs', type=str)
+parser.add_argument("--eval_file_prefix", default='dev', type=str)
+parser.add_argument("--class_balancing", default="None", type=str, choices=["None", "weighted_loss"])
+parser.add_argument(
+    "--no_shuffle_data", action='store_false', dest="shuffle_data", help="Shuffle is enabled by default."
+)
 parser.add_argument("--save_epoch_freq", default=1, type=int)
 parser.add_argument("--save_step_freq", default=-1, type=int)
-parser.add_argument("--optimizer_kind", default="adam", type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--do_lower_case", action='store_true')
-parser.add_argument("--shuffle_data", action='store_true')
-parser.add_argument("--class_balancing", default="None", type=str, choices=["None", "weighted_loss"])
+parser.add_argument('--loss_step_freq', default=25, type=int, help='Frequency of printing loss')
+parser.add_argument('--eval_step_freq', default=100, type=int, help='Frequency of evaluation')
+parser.add_argument("--local_rank", default=None, type=int)
 
 args = parser.parse_args()
 
-work_dir = f'{args.work_dir}/{args.dataset_name.upper()}'
 nf = nemo.core.NeuralModuleFactory(
     backend=nemo.core.Backend.PyTorch,
     local_rank=args.local_rank,
     optimization_level=args.amp_opt_level,
-    log_dir=work_dir,
+    log_dir=args.work_dir,
     create_tb_writer=True,
     files_to_copy=[__file__],
     add_time_to_log_dir=True,
 )
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo_nlp.huggingface.BERT.list_pretrained_models()
-"""
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
 
-if args.bert_checkpoint and args.bert_config:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(
-        config_filename=args.bert_config
-    )
-    pretrained_bert_model.restore_from(args.bert_checkpoint)
-else:
-    pretrained_bert_model = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(
-        pretrained_model_name=args.pretrained_bert_model
-    )
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
 
-hidden_size = pretrained_bert_model.hidden_size
-tokenizer = BertTokenizer.from_pretrained(args.pretrained_bert_model)
+hidden_size = model.hidden_size
 
-data_desc = SentenceClassificationDataDesc(args.dataset_name, args.data_dir, args.do_lower_case)
+data_desc = TextClassificationDataDesc(data_dir=args.data_dir, modes=[args.train_file_prefix, args.eval_file_prefix])
 
 # Create sentence classification loss on top
-classifier = nemo.collections.nlp.nm.trainables.common.sequence_classification_nm.SequenceClassifier(
-    hidden_size=hidden_size, num_classes=data_desc.num_labels, dropout=args.fc_dropout
+classifier = nemo_nlp.nm.trainables.SequenceClassifier(
+    hidden_size=hidden_size,
+    num_classes=data_desc.num_labels,
+    dropout=args.fc_dropout,
+    num_layers=args.num_output_layers,
+    log_softmax=False,
 )
 
+
 if args.class_balancing == 'weighted_loss':
     # You may need to increase the number of epochs for convergence.
-    loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss(weight=data_desc.class_weights)
+    loss_fn = nemo.backends.pytorch.common.CrossEntropyLossNM(weight=data_desc.class_weights)
 else:
-    loss_fn = nemo.backends.pytorch.common.CrossEntropyLoss()
+    loss_fn = nemo.backends.pytorch.common.CrossEntropyLossNM()
 
 
-def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mode='train'):
+def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, mode='train', is_training=True):
     logging.info(f"Loading {mode} data...")
     data_file = f'{data_desc.data_dir}/{mode}.tsv'
-    shuffle = args.shuffle_data if mode == 'train' else False
-
-    data_layer = nemo.collections.nlp.nm.data_layers.text_classification_datalayer.BertSentenceClassificationDataLayer(
+    shuffle = args.shuffle_data if is_training else False
+    data_layer = nemo_nlp.nm.data_layers.BertTextClassificationDataLayer(
         input_file=data_file,
         tokenizer=tokenizer,
         max_seq_length=args.max_seq_length,
         num_samples=num_samples,
         shuffle=shuffle,
         batch_size=batch_size,
+        use_cache=args.use_cache,
     )
 
     ids, type_ids, input_mask, labels = data_layer()
@@ -130,14 +157,13 @@ def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mod
         batch_size = data_size
 
     steps_per_epoch = math.ceil(data_size / (batch_size * num_gpus))
-    logging.info(f"Steps_per_epoch = {steps_per_epoch}")
 
-    hidden_states = pretrained_bert_model(input_ids=ids, token_type_ids=type_ids, attention_mask=input_mask)
+    hidden_states = model(input_ids=ids, token_type_ids=type_ids, attention_mask=input_mask)
 
     logits = classifier(hidden_states=hidden_states)
     loss = loss_fn(logits=logits, labels=labels)
 
-    if mode == 'train':
+    if is_training:
         tensors_to_evaluate = [loss, logits]
     else:
         tensors_to_evaluate = [logits, labels]
@@ -149,24 +175,26 @@ def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mod
     num_samples=args.num_train_samples,
     batch_size=args.batch_size,
     num_gpus=args.num_gpus,
-    local_rank=args.local_rank,
     mode=args.train_file_prefix,
+    is_training=True,
 )
+logging.info(f"Steps_per_epoch = {steps_per_epoch}")
+
 eval_tensors, _, _, data_layer = create_pipeline(
     num_samples=args.num_eval_samples,
     batch_size=args.batch_size,
     num_gpus=args.num_gpus,
-    local_rank=args.local_rank,
     mode=args.eval_file_prefix,
+    is_training=False,
 )
 
 # Create callbacks for train and eval modes
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=train_tensors,
-    print_func=lambda x: str(np.round(x[0].item(), 3)),
+    print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
     tb_writer=nf.tb_writer,
     get_tb_values=lambda x: [["loss", x[0]]],
-    step_freq=steps_per_epoch,
+    step_freq=args.loss_step_freq,
 )
 
 eval_callback = nemo.core.EvaluatorCallback(
@@ -174,7 +202,7 @@ def create_pipeline(num_samples=-1, batch_size=32, num_gpus=1, local_rank=0, mod
     user_iter_callback=lambda x, y: eval_iter_callback(x, y, data_layer),
     user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, f'{nf.work_dir}/graphs'),
     tb_writer=nf.tb_writer,
-    eval_step=steps_per_epoch,
+    eval_step=args.eval_step_freq,
 )
 
 # Create callback to save checkpoints
diff --git a/examples/nlp/token_classification/NERWithBERT.ipynb b/examples/nlp/token_classification/NERWithBERT.ipynb
index c3a38da0e49a..e2da6723e760 100644
--- a/examples/nlp/token_classification/NERWithBERT.ipynb
+++ b/examples/nlp/token_classification/NERWithBERT.ipynb
@@ -16,15 +16,16 @@
     "from nemo.collections.nlp.data import NemoBertTokenizer, SentencePieceTokenizer\n",
     "from nemo.collections.nlp.callbacks.token_classification_callback import \\\n",
     "    eval_iter_callback, eval_epochs_done_callback\n",
-    "from nemo.collections.nlp.nm.losses import TokenClassificationLoss\n",
-    "from nemo.collections.nlp.nm.trainables import TokenClassifier"
+    "from nemo.backends.pytorch.common.losses import CrossEntropyLossNM\n",
+    "from nemo.collections.nlp.nm.trainables import TokenClassifier\n",
+    "from nemo import logging"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can download data from [here](https://github.com/kyzhouhzau/BERT-NER/tree/master/data) and use [this](https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/scripts/convert_iob_format_to_token_classification_format.py) script to preprocess it."
+    "You can download data from [here](https://github.com/kyzhouhzau/BERT-NER/tree/master/data) and use [this](https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/token_classification/import_from_iob_format.py) script to preprocess it."
    ]
   },
   {
@@ -42,6 +43,7 @@
     "NUM_EPOCHS = 3\n",
     "LEARNING_RATE = 0.00005\n",
     "LR_WARMUP_PROPORTION = 0.1\n",
+    "PRETRAINED_BERT_MODEL = \"bert-base-cased\"\n",
     "OPTIMIZER = \"adam\""
    ]
   },
@@ -78,10 +80,14 @@
    "outputs": [],
    "source": [
     "# If you're using a standard BERT model, you should do it like this. To see the full\n",
-    "# list of BERT model names, check out nemo_nlp.huggingface.BERT.list_pretrained_models()\n",
-    "tokenizer = NemoBertTokenizer(pretrained_model=\"bert-base-cased\")\n",
-    "bert_model = nemo_nlp.nm.trainables.huggingface.BERT(\n",
-    "    pretrained_model_name=\"bert-base-cased\")"
+    "# list of MegatronBERT/BERT/ALBERT/RoBERTa model names, call nemo_nlp.nm.trainables.get_pretrained_lm_models_list()\n",
+    "\n",
+    "bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(\n",
+    "    pretrained_model_name=PRETRAINED_BERT_MODEL)\n",
+    "\n",
+    "tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(\n",
+    "    tokenizer_name=\"nemobert\",\n",
+    "    pretrained_model_name=PRETRAINED_BERT_MODEL)"
    ]
   },
   {
@@ -106,7 +112,7 @@
     "                                          num_classes=num_classes,\n",
     "                                          dropout=CLASSIFICATION_DROPOUT)\n",
     "\n",
-    "ner_loss = TokenClassificationLoss(num_classes=len(label_ids))\n",
+    "ner_loss = CrossEntropyLossNM(logits_ndim=3)\n",
     "\n",
     "input_ids, input_type_ids, input_mask, loss_mask, _, labels = train_data_layer()\n",
     "\n",
@@ -152,7 +158,7 @@
    "source": [
     "callback_train = nemo.core.SimpleLossLoggerCallback(\n",
     "    tensors=[loss],\n",
-    "    print_func=lambda x: print(\"Loss: {:.3f}\".format(x[0].item())))\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())))\n",
     "\n",
     "train_data_size = len(train_data_layer)\n",
     "\n",
@@ -218,9 +224,18 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.4"
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/examples/nlp/token_classification/PunctuationWithBERT.ipynb b/examples/nlp/token_classification/PunctuationWithBERT.ipynb
index e4905b1d6277..77a62e563b70 100644
--- a/examples/nlp/token_classification/PunctuationWithBERT.ipynb
+++ b/examples/nlp/token_classification/PunctuationWithBERT.ipynb
@@ -17,12 +17,15 @@
     "import nemo.collections.nlp as nemo_nlp\n",
     "from nemo.collections.nlp.data import NemoBertTokenizer\n",
     "from nemo.collections.nlp.nm.trainables import TokenClassifier\n",
-    "from nemo.collections.nlp.nm.losses import TokenClassificationLoss, LossAggregatorNM\n",
+    "from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM\n",
     "from nemo.collections.nlp.callbacks.punctuation_capitalization_callback import eval_iter_callback, eval_epochs_done_callback\n",
-    "from nemo.collections.nlp.utils.common_nlp_utils import calc_class_weights\n",
+    "from nemo.collections.nlp.data.datasets.datasets_utils import calc_class_weights\n",
     "\n",
     "DATA_DIR = \"PATH_TO_WHERE_THE_DATA_IS\"\n",
     "WORK_DIR = \"PATH_TO_WHERE_TO_STORE_CHECKPOINTS_AND_LOGS\"\n",
+    "\n",
+    "# See the list of available pre-trained models by calling\n",
+    "# the nemo_nlp.nm.trainables.get_bert_models_list()\n",
     "PRETRAINED_BERT_MODEL = \"bert-base-uncased\"\n",
     "\n",
     "# model parameters\n",
@@ -50,7 +53,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this notebook we're going to use a subset of English examples from the [Tatoeba collection of sentences](https://tatoeba.org/eng), set NUM_SAMPLES=-1 and consider including other datasets to improve the performance of the model. Use [NeMo/nemo/collections/nlp/data/scripts/get_tatoeba_data.py](https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/scripts/get_tatoeba.py) to download and preprocess the Tatoeba data."
+    "In this notebook we're going to use a subset of English examples from the [Tatoeba collection of sentences](https://tatoeba.org/eng), set NUM_SAMPLES=-1 and consider including other datasets to improve the performance of the model. Use [NeMo/examples/nlp/token_classification/get_tatoeba_data.py](https://github.com/NVIDIA/NeMo/blob/master/examples/nlp/token_classification/get_tatoeba_data.py) to download and preprocess the Tatoeba data."
    ]
   },
   {
@@ -61,7 +64,7 @@
    "source": [
     "# This should take about a minute since the data is already downloaded in the previous step\n",
     "\n",
-    "! python ../scripts/get_tatoeba.py --data_dir $DATA_DIR --num_sample $NUM_SAMPLES"
+    "! python get_tatoeba_data.py --data_dir $DATA_DIR --num_sample $NUM_SAMPLES"
    ]
   },
   {
@@ -117,10 +120,14 @@
    "outputs": [],
    "source": [
     "# If you're using a standard BERT model, you should do it like this. To see the full\n",
-    "# list of BERT model names, check out nemo_nlp.huggingface.BERT.list_pretrained_models()\n",
+    "# list of MegatronBERT/BERT/ALBERT/RoBERTa model names, call nemo_nlp.nm.trainables.get_pretrained_lm_models_list()\n",
+    "\n",
+    "bert_model = nemo_nlp.nm.trainables.get_pretrained_lm_model(\n",
+    "    pretrained_model_name=PRETRAINED_BERT_MODEL)\n",
     "\n",
-    "tokenizer = NemoBertTokenizer(pretrained_model=PRETRAINED_BERT_MODEL)\n",
-    "bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=PRETRAINED_BERT_MODEL)"
+    "tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(\n",
+    "    tokenizer_name=\"nemobert\",\n",
+    "    pretrained_model_name=PRETRAINED_BERT_MODEL)"
    ]
   },
   {
@@ -167,10 +174,8 @@
     "class_weights = calc_class_weights(punct_label_freqs)\n",
     "\n",
     "# define loss\n",
-    "punct_loss = TokenClassificationLoss(\n",
-    "    num_classes=len(punct_label_ids),\n",
-    "    class_weights=class_weights)\n",
-    "capit_loss = TokenClassificationLoss(num_classes=len(capit_label_ids))\n",
+    "punct_loss = CrossEntropyLossNM(logits_ndim=3, weight=class_weights)\n",
+    "capit_loss = CrossEntropyLossNM(logits_ndim=3)\n",
     "task_loss = LossAggregatorNM(num_inputs=2)"
    ]
   },
@@ -258,7 +263,7 @@
    "source": [
     "callback_train = nemo.core.SimpleLossLoggerCallback(\n",
     "    tensors=[task_loss, punct_loss, capit_loss, punct_logits, capit_logits],\n",
-    "    print_func=lambda x: print(\"Loss: {:.3f}\".format(x[0].item())),\n",
+    "    print_func=lambda x: logging.info(\"Loss: {:.3f}\".format(x[0].item())),\n",
     "    step_freq=STEP_FREQ)\n",
     "\n",
     "train_data_size = len(train_data_layer)\n",
@@ -479,9 +484,18 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.4"
+   "version": "3.7.6"
+  },
+  "pycharm": {
+   "stem_cell": {
+    "cell_type": "raw",
+    "metadata": {
+     "collapsed": false
+    },
+    "source": []
+   }
   }
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/examples/nlp/token_classification/get_medical_data.py b/examples/nlp/token_classification/get_medical_data.py
new file mode 100644
index 000000000000..bc213d06f360
--- /dev/null
+++ b/examples/nlp/token_classification/get_medical_data.py
@@ -0,0 +1,74 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+
+import argparse
+import logging
+import os
+import subprocess
+
+from nemo import logging
+
+URL = {
+    'bc5cdr': 'https://drive.google.com/uc?id=1OletxmPYNkz2ltOr9pyT0b0iBtUWxslh',
+    'ncbi': 'https://drive.google.com/uc?id=1OletxmPYNkz2ltOr9pyT0b0iBtUWxslh',
+}
+
+
+def __maybe_download_file(destination: str, dataset: str):
+    """
+    Downloads data from https://github.com/dmis-lab/biobert#datasets named entity recognition to destination if not exists.
+    If exists, skips download
+    Args:
+        destination: local filepath
+        dataset: dataset
+    """
+    parent_source, child_source = dataset.split("-")
+    download_url = URL[parent_source]
+    if not os.path.exists(destination):
+        logging.info(f'Downloading {download_url} from https://github.com/dmis-lab/biobert#datasets to {destination}')
+        tmp_zip = '/tmp/data.zip'
+        tmp_unzip = '/tmp/data'
+        if not os.path.exists(tmp_unzip):
+            os.makedirs(tmp_unzip)
+        else:
+            subprocess.run(['rm', '-rf', tmp_unzip])
+        subprocess.run(['gdown', '-O', tmp_zip, download_url])
+        subprocess.run(['unzip', tmp_zip, '-d', tmp_unzip])
+
+        subprocess.run(['mv', os.path.join(tmp_unzip, f"{parent_source.upper()}-{child_source}"), destination])
+        if os.path.exists(os.path.join(destination, "devel.tsv")):
+            subprocess.run(['mv', os.path.join(destination, "devel.tsv"), os.path.join(destination, "dev.tsv")])
+        subprocess.run(['rm', '-rf', tmp_zip])
+        subprocess.run(['rm', '-rf', tmp_unzip])
+    else:
+        logging.info(f'{destination} found. Skipping download')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Prepare dataset')
+    parser.add_argument("--data_dir", required=True, type=str)
+    parser.add_argument(
+        "--dataset", default='bc5cdr-chem', choices=['bc5cdr-chem', 'bc5cdr-disease', 'ncbi-disease'], type=str
+    )
+    args = parser.parse_args()
+
+    if not os.path.exists(args.data_dir):
+        os.makedirs(args.data_dir)
+
+    logging.info(f'Downloading dataset')
+    data_dir = os.path.join(args.data_dir, args.dataset)
+    __maybe_download_file(data_dir, args.dataset)
diff --git a/examples/nlp/scripts/get_tatoeba.py b/examples/nlp/token_classification/get_tatoeba_data.py
similarity index 93%
rename from examples/nlp/scripts/get_tatoeba.py
rename to examples/nlp/token_classification/get_tatoeba_data.py
index 54cbefe94e1b..522ffcafb318 100644
--- a/examples/nlp/scripts/get_tatoeba.py
+++ b/examples/nlp/token_classification/get_tatoeba_data.py
@@ -20,7 +20,7 @@
 import random
 import re
 import string
-import urllib.request
+import subprocess
 
 from nemo import logging
 
@@ -37,16 +37,10 @@ def __maybe_download_file(destination: str, source: str):
     """
     source = URL[source]
     if not os.path.exists(destination):
-        logging.info(f'Downloading {source}')
-        logging.info(
-            f'Downloading could take a long time '
-            + 'To get the data faster consider running in a terminal:\n'
-            + 'wget https://downloads.tatoeba.org/exports/sentences.csv\n'
-            + 'grep -P "\teng\t" sentences.csv > eng_sentences.csv\n'
-            + 'mv eng_sentences.csv sentences.csv\n'
-            + 'And then rerun this script to preprocess the data.'
-        )
-        urllib.request.urlretrieve(source, filename=destination)
+        logging.info(f'Downloading {source} to {destination}')
+        subprocess.run(['wget', '-O', destination, source])
+    else:
+        logging.info(f'{destination} found. Skipping download')
 
 
 def __process_english_sentences(in_file, out_file, percent_to_cut=0, num_to_combine=1, num_samples=-1):
diff --git a/examples/nlp/token_classification/import_from_iob_format.py b/examples/nlp/token_classification/import_from_iob_format.py
new file mode 100644
index 000000000000..0050e06fb69b
--- /dev/null
+++ b/examples/nlp/token_classification/import_from_iob_format.py
@@ -0,0 +1,115 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import argparse
+import os
+
+from nemo import logging
+
+
+def __convert_data(in_file, out_text, out_labels, max_length):
+    """
+    in_file should be in the IOB format, see example here:
+    https://www.clips.uantwerpen.be/conll2003/ner/.
+
+    After the convertion, the dataset is splitted into 2 files: text.txt
+    and labels.txt.
+    Each line of the text.txt file contains text sequences, where words
+    are separated with spaces. The labels.txt file contains corresponding
+    labels for each word in text.txt, the labels are separated with spaces.
+    Each line of the files should follow the format:
+    [WORD] [SPACE] [WORD] [SPACE] [WORD] (for text.txt) and
+    [LABEL] [SPACE] [LABEL] [SPACE] [LABEL] (for labels.txt).
+
+    """
+    in_file = open(in_file, 'r')
+
+    if max_length == -1:
+        with open(out_text, 'w') as out_text, open(out_labels, 'w') as out_labels:
+            for line in in_file:
+                if line == '\n':
+                    out_text.write(line)
+                    out_labels.write(line)
+                else:
+                    line = line.split()
+                    out_text.write(line[0] + ' ')
+                    out_labels.write(line[-1] + ' ')
+
+    else:
+        lines = []
+        words = []
+        labels = []
+        with open(out_text, 'w') as out_text, open(out_labels, 'w') as out_labels:
+            lines = in_file.readlines()
+            for line_id, line in enumerate(lines):
+                logging.info(f"{line_id} {len(lines)}")
+                contends = line.strip()
+                if len(contends) == 0:
+                    assert len(words) == len(labels)
+                    if len(words) > max_length:
+                        # split if the sentence is longer than 30
+                        while len(words) > max_length:
+                            tmplabel = labels[:max_length]
+                            for iidx in range(len(tmplabel)):
+                                if tmplabel.pop() == 'O':
+                                    break
+                            l = ' '.join([label for label in labels[: len(tmplabel) + 1] if len(label) > 0])
+                            w = ' '.join([word for word in words[: len(tmplabel) + 1] if len(word) > 0])
+                            # lines.append([l, w])
+                            out_text.write(w + "\n")
+                            out_labels.write(l + "\n")
+                            words = words[len(tmplabel) + 1 :]
+                            labels = labels[len(tmplabel) + 1 :]
+
+                    if len(words) == 0:
+                        continue
+                    l = ' '.join([label for label in labels if len(label) > 0])
+                    w = ' '.join([word for word in words if len(word) > 0])
+                    # lines.append([l, w])
+                    out_text.write(w + "\n")
+                    out_labels.write(l + "\n")
+                    words = []
+                    labels = []
+                    continue
+
+                word = line.strip().split()[0]
+                label = line.strip().split()[-1]
+                words.append(word)
+                labels.append(label)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='Convert data from IOB '
+        + 'format to the format compatible with '
+        + 'nlp/examples/token_classification.py'
+    )
+    parser.add_argument("--data_file", required=True, type=str)
+    parser.add_argument("--max_length", default=-1, type=int)
+    args = parser.parse_args()
+
+    data_dir = os.path.dirname(args.data_file)
+    basename = os.path.basename(args.data_file)
+    prefix, ext = os.path.splitext(basename)
+    if not os.path.exists(args.data_file):
+        raise FileNotFoundError("{data_file} not found in {data_dir}")
+
+    logging.info(f'Processing {args.data_file}')
+    out_text = os.path.join(data_dir, 'text_' + prefix + '.txt')
+    out_labels = os.path.join(data_dir, 'labels_' + prefix + '.txt')
+
+    __convert_data(args.data_file, out_text, out_labels, args.max_length)
+    logging.info(f'Processing of the {args.data_file} is complete')
diff --git a/examples/nlp/token_classification/punctuation_capitalization.py b/examples/nlp/token_classification/punctuation_capitalization.py
index b74eeff89663..549c60486cd9 100644
--- a/examples/nlp/token_classification/punctuation_capitalization.py
+++ b/examples/nlp/token_classification/punctuation_capitalization.py
@@ -15,19 +15,18 @@
 # =============================================================================
 
 import argparse
-import json
 import os
 
+import nemo
 import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.utils.common_nlp_utils
 from nemo import logging
+from nemo.backends.pytorch.common.losses import CrossEntropyLossNM, LossAggregatorNM
 from nemo.collections.nlp.callbacks.punctuation_capitalization_callback import (
     eval_epochs_done_callback,
     eval_iter_callback,
 )
-from nemo.collections.nlp.data import NemoBertTokenizer, SentencePieceTokenizer
+from nemo.collections.nlp.data.datasets.datasets_utils import calc_class_weights
 from nemo.collections.nlp.nm.data_layers import PunctuationCapitalizationDataLayer
-from nemo.collections.nlp.nm.losses.token_classification_loss import TokenClassificationLoss
 from nemo.collections.nlp.nm.trainables import TokenClassifier
 from nemo.utils.lr_policies import get_lr_policy
 
@@ -53,15 +52,21 @@
 parser.add_argument("--ignore_start_end", action='store_false')
 parser.add_argument("--ignore_extra_tokens", action='store_false')
 parser.add_argument("--none_label", default='O', type=str)
-parser.add_argument("--shuffle_data", action='store_true')
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
+parser.add_argument("--no_shuffle_data", action='store_false', dest="shuffle_data")
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
 parser.add_argument("--bert_checkpoint", default=None, type=str)
 parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
 parser.add_argument("--punct_classifier_checkpoint", default=None, type=str)
 parser.add_argument("--capit_classifier_checkpoint", default=None, type=str)
 parser.add_argument(
     "--tokenizer_model",
-    default="tokenizer.model",
+    default=None,
     type=str,
     help="Path to pretrained tokenizer model, \
                     only used if --tokenizer is sentencepiece",
@@ -74,6 +79,15 @@
     help="tokenizer to use, \
                     only relevant when using custom pretrained checkpoint.",
 )
+parser.add_argument(
+    "--vocab_file", default=None, help="Path to the vocab file. Required for pretrained Megatron models"
+)
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
 parser.add_argument(
     "--work_dir",
     default='output',
@@ -123,32 +137,20 @@
 
 output_file = f'{nf.work_dir}/output.txt'
 
-if args.bert_checkpoint is None:
-    """ Use this if you're using a standard BERT model.
-    To see the list of pretrained models, call:
-    nemo_nlp.huggingface.BERT.list_pretrained_models()
-    """
-    tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-else:
-    """ Use this if you're using a BERT model that you pre-trained yourself.
-    """
-    if args.tokenizer == "sentencepiece":
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
-        tokenizer = SentencePieceTokenizer(model_path=args.tokenizer_model, special_tokens=special_tokens)
-    elif args.tokenizer == "nemobert":
-        tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    else:
-        raise ValueError(f"received unexpected tokenizer '{args.tokenizer}'")
-    if args.bert_config is not None:
-        with open(args.bert_config) as json_file:
-            config = json.load(json_file)
-        model = nemo_nlp.nm.trainables.huggingface.BERT(**config)
-    else:
-        model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
 
-    model.restore_from(args.bert_checkpoint)
-    logging.info(f"Model restored from {args.bert_checkpoint}")
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
 
 hidden_size = model.hidden_size
 
@@ -214,7 +216,7 @@ def create_pipeline(
         if args.use_weighted_loss_punct:
             logging.info(f"Using weighted loss for punctuation task")
             punct_label_freqs = data_layer.dataset.punct_label_frequencies
-            class_weights = nemo.collections.nlp.utils.common_nlp_utils.calc_class_weights(punct_label_freqs)
+            class_weights = calc_class_weights(punct_label_freqs)
 
         # Initialize punctuation loss
         punct_classifier = punct_classifier(
@@ -225,15 +227,15 @@ def create_pipeline(
             name='Punctuation',
         )
 
-        punct_loss = TokenClassificationLoss(num_classes=len(punct_label_ids), class_weights=class_weights)
+        punct_loss = CrossEntropyLossNM(logits_ndim=3, weight=class_weights)
 
         # Initialize capitalization loss
         capit_classifier = capit_classifier(
             hidden_size=hidden_size, num_classes=len(capit_label_ids), dropout=dropout, name='Capitalization'
         )
-        capit_loss = TokenClassificationLoss(num_classes=len(capit_label_ids))
+        capit_loss = CrossEntropyLossNM(logits_ndim=3)
 
-        task_loss = nemo_nlp.nm.losses.LossAggregatorNM(num_inputs=2)
+        task_loss = LossAggregatorNM(num_inputs=2)
 
     hidden_states = model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
 
@@ -278,8 +280,9 @@ def create_pipeline(
 # Create trainer and execute training action
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=losses + train_logits,
-    print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
+    print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
     get_tb_values=lambda x: [["loss", x[0]]],
+    step_freq=args.loss_step_freq,
     tb_writer=nf.tb_writer,
 )
 
@@ -306,5 +309,5 @@ def create_pipeline(
     callbacks=[train_callback, eval_callback, ckpt_callback],
     lr_policy=lr_policy_fn,
     optimizer=args.optimizer_kind,
-    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr},
+    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay},
 )
diff --git a/examples/nlp/token_classification/punctuation_capitalization_infer.py b/examples/nlp/token_classification/punctuation_capitalization_infer.py
index 9c2f8bede33c..01783ee22c60 100644
--- a/examples/nlp/token_classification/punctuation_capitalization_infer.py
+++ b/examples/nlp/token_classification/punctuation_capitalization_infer.py
@@ -22,16 +22,41 @@
 import nemo
 import nemo.collections.nlp as nemo_nlp
 from nemo import logging
-from nemo.collections.nlp.data import NemoBertTokenizer
 from nemo.collections.nlp.nm.data_layers import BertTokenClassificationInferDataLayer
-from nemo.collections.nlp.utils.common_nlp_utils import get_vocab
+from nemo.collections.nlp.utils.data_utils import get_vocab
 
 # Parsing arguments
 parser = argparse.ArgumentParser(description='Punctuation and capitalization detection inference')
 parser.add_argument("--max_seq_length", default=128, type=int)
-parser.add_argument("--fc_dropout", default=0, type=float)
 parser.add_argument("--punct_num_fc_layers", default=3, type=int)
-parser.add_argument("--pretrained_bert_model", default="bert-base-uncased", type=str)
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument("--vocab_file", default=None, help="Path to the vocab file.")
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
 parser.add_argument("--none_label", default='O', type=str)
 parser.add_argument(
     "--queries",
@@ -54,7 +79,7 @@
     help="Whether to take predicted label in brackets or \
                     just append to word in the output",
 )
-parser.add_argument("--checkpoints_dir", default='output/checkpoints', type=str)
+parser.add_argument("--checkpoint_dir", default='output/checkpoints', type=str)
 parser.add_argument(
     "--punct_labels_dict",
     default='punct_label_ids.csv',
@@ -69,33 +94,36 @@
     help='This file is generated during training \
                     when the datalayer is created',
 )
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
 
 args = parser.parse_args()
 
-if not os.path.exists(args.checkpoints_dir):
-    raise ValueError(f'Checkpoints folder not found at {args.checkpoints_dir}')
+if not os.path.exists(args.checkpoint_dir):
+    raise ValueError(f'Checkpoints folder not found at {args.checkpoint_dir}')
 if not (os.path.exists(args.punct_labels_dict) and os.path.exists(args.capit_labels_dict)):
     raise ValueError(
         f'Dictionary with ids to labels not found at {args.punct_labels_dict} \
          or {args.punct_labels_dict}'
     )
 
-nf = nemo.core.NeuralModuleFactory(
-    backend=nemo.core.Backend.PyTorch, optimization_level=args.amp_opt_level, log_dir=None
-)
+nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, log_dir=None)
 
 punct_labels_dict = get_vocab(args.punct_labels_dict)
 
 capit_labels_dict = get_vocab(args.capit_labels_dict)
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo.collections.nlp.BERT.list_pretrained_models()
-"""
-pretrained_bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-hidden_size = pretrained_bert_model.hidden_size
-tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name, config=args.bert_config, vocab=args.vocab_file
+)
+
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
+
+hidden_size = model.hidden_size
 
 data_layer = BertTokenClassificationInferDataLayer(
     queries=args.queries, tokenizer=tokenizer, max_seq_length=args.max_seq_length, batch_size=1
@@ -104,18 +132,17 @@
 punct_classifier = nemo_nlp.nm.trainables.TokenClassifier(
     hidden_size=hidden_size,
     num_classes=len(punct_labels_dict),
-    dropout=args.fc_dropout,
     num_layers=args.punct_num_fc_layers,
     name='Punctuation',
 )
 
 capit_classifier = nemo_nlp.nm.trainables.TokenClassifier(
-    hidden_size=hidden_size, num_classes=len(capit_labels_dict), dropout=args.fc_dropout, name='Capitalization'
+    hidden_size=hidden_size, num_classes=len(capit_labels_dict), name='Capitalization'
 )
 
 input_ids, input_type_ids, input_mask, loss_mask, subtokens_mask = data_layer()
 
-hidden_states = pretrained_bert_model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
+hidden_states = model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
 
 punct_logits = punct_classifier(hidden_states=hidden_states)
 capit_logits = capit_classifier(hidden_states=hidden_states)
@@ -123,7 +150,7 @@
 ###########################################################################
 
 # Instantiate an optimizer to perform `infer` action
-evaluated_tensors = nf.infer(tensors=[punct_logits, capit_logits, subtokens_mask], checkpoint_dir=args.checkpoints_dir)
+evaluated_tensors = nf.infer(tensors=[punct_logits, capit_logits, subtokens_mask], checkpoint_dir=args.checkpoint_dir)
 
 
 def concatenate(lists):
diff --git a/examples/nlp/token_classification/token_classification.py b/examples/nlp/token_classification/token_classification.py
index 7254929863f1..e4b87502bbcd 100644
--- a/examples/nlp/token_classification/token_classification.py
+++ b/examples/nlp/token_classification/token_classification.py
@@ -14,49 +14,80 @@
 # limitations under the License.
 # =============================================================================
 
+"""
+Tutorial on how to use this script to solve NER task could be found here:
+https://nvidia.github.io/NeMo/nlp/intro.html#named-entity-recognition
+"""
+
 import argparse
-import json
 import os
 
 import nemo.collections.nlp as nemo_nlp
-import nemo.collections.nlp.utils.common_nlp_utils
+import nemo.collections.nlp.utils.data_utils
 from nemo import logging
+from nemo.backends.pytorch.common.losses import CrossEntropyLossNM
 from nemo.collections.nlp.callbacks.token_classification_callback import eval_epochs_done_callback, eval_iter_callback
-from nemo.collections.nlp.data import NemoBertTokenizer, SentencePieceTokenizer
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import calc_class_weights
 from nemo.collections.nlp.nm.data_layers import BertTokenClassificationDataLayer
-from nemo.collections.nlp.nm.losses import TokenClassificationLoss
 from nemo.collections.nlp.nm.trainables import TokenClassifier
 from nemo.utils.lr_policies import get_lr_policy
 
 # Parsing arguments
+"""Provide extra arguments required for tasks."""
 parser = argparse.ArgumentParser(description="Token classification with pretrained BERT")
 parser.add_argument("--local_rank", default=None, type=int)
-parser.add_argument("--batch_size", default=8, type=int)
-parser.add_argument("--max_seq_length", default=128, type=int)
+
+# training arguments
+parser.add_argument(
+    "--work_dir",
+    default='output',
+    type=str,
+    help="The output directory where the model prediction and checkpoints will be written.",
+)
+parser.add_argument("--no_time_to_log_dir", action="store_true", help="whether to add time to work_dir or not")
 parser.add_argument("--num_gpus", default=1, type=int)
 parser.add_argument("--num_epochs", default=5, type=int)
+parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
+parser.add_argument(
+    "--save_epoch_freq",
+    default=1,
+    type=int,
+    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+)
+parser.add_argument(
+    "--save_step_freq",
+    default=-1,
+    type=int,
+    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+)
+parser.add_argument(
+    "--eval_step_freq", default=-1, type=int, help="Frequency of evaluation, -1 to evaluate every epoch"
+)
+parser.add_argument("--loss_step_freq", default=250, type=int, help="Frequency of printing loss")
+parser.add_argument("--use_weighted_loss", action='store_true', help="Flag to indicate whether to use weighted loss")
+
+# learning rate arguments
 parser.add_argument("--lr_warmup_proportion", default=0.1, type=float)
 parser.add_argument("--lr", default=5e-5, type=float)
 parser.add_argument("--lr_policy", default="WarmupAnnealing", type=str)
-parser.add_argument("--weight_decay", default=0, type=float)
+parser.add_argument("--weight_decay", default=0.01, type=float)
 parser.add_argument("--optimizer_kind", default="adam", type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
-parser.add_argument("--data_dir", default="/data", type=str)
+
+# task specific arguments
 parser.add_argument("--fc_dropout", default=0.5, type=float)
 parser.add_argument("--num_fc_layers", default=2, type=int)
+
+# data arguments
+parser.add_argument("--data_dir", default="/data", type=str)
+parser.add_argument("--max_seq_length", default=128, type=int)
 parser.add_argument("--ignore_start_end", action='store_false')
 parser.add_argument("--ignore_extra_tokens", action='store_false')
 parser.add_argument("--none_label", default='O', type=str)
-parser.add_argument("--shuffle_data", action='store_false')
-parser.add_argument("--pretrained_bert_model", default="bert-base-cased", type=str)
-parser.add_argument("--bert_checkpoint", default=None, type=str)
-parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
-parser.add_argument(
-    "--tokenizer_model",
-    default="tokenizer.model",
-    type=str,
-    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
-)
+parser.add_argument("--mode", default='train_eval', choices=["train_eval", "train"], type=str)
+parser.add_argument("--no_shuffle_data", action='store_false', dest="shuffle_data")
+parser.add_argument("--use_cache", action='store_true', help="Whether to cache preprocessed data")
+parser.add_argument("--batch_size", default=8, type=int, help="Batch size")
+parser.add_argument("--batches_per_step", default=1, type=int, help="Number of iterations per step.")
 parser.add_argument(
     "--tokenizer",
     default="nemobert",
@@ -65,28 +96,35 @@
     help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
 )
 parser.add_argument(
-    "--work_dir",
-    default='output',
+    "--vocab_file", default=None, help="Path to the vocab file. Required for pretrained Megatron models"
+)
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
     type=str,
-    help="The output directory where the model prediction and checkpoints will be written.",
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
 )
-parser.add_argument("--use_cache", action='store_true', help="Whether to cache preprocessed data")
 parser.add_argument(
-    "--save_epoch_freq",
-    default=1,
-    type=int,
-    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
 )
+
+# model arguments
 parser.add_argument(
-    "--save_step_freq",
-    default=-1,
-    type=int,
-    help="Frequency of saving checkpoint '-1' - step checkpoint won't be saved",
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
 )
-parser.add_argument("--loss_step_freq", default=250, type=int, help="Frequency of printing loss")
-parser.add_argument("--use_weighted_loss", action='store_true', help="Flag to indicate whether to use weighted loss")
+parser.add_argument("--bert_checkpoint", default=None, type=str, help="Path to bert pretrained  checkpoint")
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+
 
 args = parser.parse_args()
+logging.info(args)
 
 if not os.path.exists(args.data_dir):
     raise FileNotFoundError(
@@ -103,42 +141,29 @@
     log_dir=args.work_dir,
     create_tb_writer=True,
     files_to_copy=[__file__],
-    add_time_to_log_dir=True,
+    add_time_to_log_dir=not args.no_time_to_log_dir,
 )
 
-logging.info(args)
-
 output_file = f'{nf.work_dir}/output.txt'
 
-if args.bert_checkpoint is None:
-    """ Use this if you're using a standard BERT model.
-    To see the list of pretrained models, call:
-    nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()
-    """
-    tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-else:
-    """ Use this if you're using a BERT model that you pre-trained yourself.
-    """
-    if args.tokenizer == "sentencepiece":
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
-        tokenizer = SentencePieceTokenizer(model_path=args.tokenizer_model)
-    elif args.tokenizer == "nemobert":
-        tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
-    else:
-        raise ValueError(f"received unexpected tokenizer '{args.tokenizer}'")
-    if args.bert_config is not None:
-        with open(args.bert_config) as json_file:
-            config = json.load(json_file)
-        model = nemo_nlp.nm.trainables.huggingface.BERT(**config)
-    else:
-        model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
 
-    model.restore_from(args.bert_checkpoint)
-    logging.info(f"Model restored from {args.bert_checkpoint}")
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name,
+    config=args.bert_config,
+    vocab=args.vocab_file,
+    checkpoint=args.bert_checkpoint,
+)
 
 hidden_size = model.hidden_size
 
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
+
 
 def create_pipeline(
     pad_label=args.none_label,
@@ -146,6 +171,7 @@ def create_pipeline(
     batch_size=args.batch_size,
     num_gpus=args.num_gpus,
     mode='train',
+    batches_per_step=args.batches_per_step,
     label_ids=None,
     ignore_extra_tokens=args.ignore_extra_tokens,
     ignore_start_end=args.ignore_start_end,
@@ -197,22 +223,20 @@ def create_pipeline(
         if args.use_weighted_loss:
             logging.info(f"Using weighted loss")
             label_freqs = data_layer.dataset.label_frequencies
-            class_weights = nemo.collections.nlp.utils.common_nlp_utils.calc_class_weights(label_freqs)
-
-            logging.info(f"class_weights: {class_weights}")
+            class_weights = calc_class_weights(label_freqs)
 
         classifier = classifier(
             hidden_size=hidden_size, num_classes=len(label_ids), dropout=dropout, num_layers=num_layers
         )
 
-        task_loss = TokenClassificationLoss(num_classes=len(label_ids), class_weights=class_weights)
+        task_loss = CrossEntropyLossNM(logits_ndim=3, weight=class_weights)
 
     hidden_states = model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
     logits = classifier(hidden_states=hidden_states)
 
     if mode == 'train':
         loss = task_loss(logits=logits, labels=labels, loss_mask=loss_mask)
-        steps_per_epoch = len(data_layer) // (batch_size * num_gpus)
+        steps_per_epoch = len(data_layer) // (batch_size * num_gpus * batches_per_step)
         tensors_to_evaluate = [loss, logits]
         return tensors_to_evaluate, loss, steps_per_epoch, label_ids, classifier
     else:
@@ -220,31 +244,35 @@ def create_pipeline(
         return tensors_to_evaluate, data_layer
 
 
+callbacks = []
 train_tensors, train_loss, steps_per_epoch, label_ids, classifier = create_pipeline()
-
-eval_tensors, data_layer = create_pipeline(mode='dev', label_ids=label_ids, classifier=classifier)
-
 logging.info(f"steps_per_epoch = {steps_per_epoch}")
-
 # Create trainer and execute training action
 train_callback = nemo.core.SimpleLossLoggerCallback(
     tensors=train_tensors,
-    print_func=lambda x: print("Loss: {:.3f}".format(x[0].item())),
+    print_func=lambda x: logging.info("Loss: {:.3f}".format(x[0].item())),
     get_tb_values=lambda x: [["loss", x[0]]],
+    step_freq=args.loss_step_freq,
     tb_writer=nf.tb_writer,
 )
+callbacks.append(train_callback)
 
-eval_callback = nemo.core.EvaluatorCallback(
-    eval_tensors=eval_tensors,
-    user_iter_callback=lambda x, y: eval_iter_callback(x, y),
-    user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, label_ids, f'{nf.work_dir}/graphs'),
-    tb_writer=nf.tb_writer,
-    eval_step=steps_per_epoch,
-)
+
+if "eval" in args.mode:
+    eval_tensors, data_layer = create_pipeline(mode='dev', label_ids=label_ids, classifier=classifier)
+    eval_callback = nemo.core.EvaluatorCallback(
+        eval_tensors=eval_tensors,
+        user_iter_callback=lambda x, y: eval_iter_callback(x, y),
+        user_epochs_done_callback=lambda x: eval_epochs_done_callback(x, label_ids, f'{nf.work_dir}/graphs'),
+        tb_writer=nf.tb_writer,
+        eval_step=args.eval_step_freq if args.eval_step_freq > 0 else steps_per_epoch,
+    )
+    callbacks.append(eval_callback)
 
 ckpt_callback = nemo.core.CheckpointCallback(
     folder=nf.checkpoint_dir, epoch_freq=args.save_epoch_freq, step_freq=args.save_step_freq
 )
+callbacks.append(ckpt_callback)
 
 lr_policy_fn = get_lr_policy(
     args.lr_policy, total_steps=args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion
@@ -252,8 +280,9 @@ def create_pipeline(
 
 nf.train(
     tensors_to_optimize=[train_loss],
-    callbacks=[train_callback, eval_callback, ckpt_callback],
+    callbacks=callbacks,
     lr_policy=lr_policy_fn,
+    batches_per_step=args.batches_per_step,
     optimizer=args.optimizer_kind,
-    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr},
+    optimization_params={"num_epochs": args.num_epochs, "lr": args.lr, "weight_decay": args.weight_decay},
 )
diff --git a/examples/nlp/token_classification/token_classification_infer.py b/examples/nlp/token_classification/token_classification_infer.py
index f1d7d1bf5cdf..66bfa0f25f06 100644
--- a/examples/nlp/token_classification/token_classification_infer.py
+++ b/examples/nlp/token_classification/token_classification_infer.py
@@ -22,15 +22,42 @@
 import nemo
 import nemo.collections.nlp as nemo_nlp
 from nemo import logging
-from nemo.collections.nlp.data import NemoBertTokenizer
 from nemo.collections.nlp.nm.trainables import TokenClassifier
-from nemo.collections.nlp.utils.common_nlp_utils import get_vocab
+from nemo.collections.nlp.utils.data_utils import get_vocab
 
 # Parsing arguments
 parser = argparse.ArgumentParser(description='NER with pretrained BERT')
 parser.add_argument("--max_seq_length", default=128, type=int)
-parser.add_argument("--fc_dropout", default=0, type=float)
-parser.add_argument("--pretrained_bert_model", default="bert-base-cased", type=str)
+parser.add_argument(
+    "--pretrained_model_name",
+    default="bert-base-uncased",
+    type=str,
+    help="Name of the pre-trained model",
+    choices=nemo_nlp.nm.trainables.get_pretrained_lm_models_list(),
+)
+parser.add_argument("--bert_config", default=None, type=str, help="Path to bert config file in json format")
+parser.add_argument(
+    "--tokenizer",
+    default="nemobert",
+    type=str,
+    choices=["nemobert", "sentencepiece"],
+    help="tokenizer to use, only relevant when using custom pretrained checkpoint.",
+)
+parser.add_argument(
+    "--tokenizer_model",
+    default=None,
+    type=str,
+    help="Path to pretrained tokenizer model, only used if --tokenizer is sentencepiece",
+)
+parser.add_argument(
+    "--vocab_file", default=None, help="Path to the vocab file. Required for pretrained Megatron models"
+)
+parser.add_argument(
+    "--do_lower_case",
+    action='store_true',
+    help="Whether to lower case the input text. True for uncased models, False for cased models. "
+    + "Only applicable when tokenizer is build with vocab file",
+)
 parser.add_argument("--none_label", default='O', type=str)
 parser.add_argument(
     "--queries",
@@ -50,47 +77,50 @@
     help="Whether to take predicted label in brackets or \
                     just append to word in the output",
 )
-parser.add_argument("--work_dir", default='output/checkpoints', type=str)
+parser.add_argument("--checkpoint_dir", default='output/checkpoints', type=str)
 parser.add_argument("--labels_dict", default='label_ids.csv', type=str)
-parser.add_argument("--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2"])
 
 args = parser.parse_args()
-print(args)
+logging.info(args)
 
-if not os.path.exists(args.work_dir):
-    raise ValueError(f'Work directory not found at {args.work_dir}')
+if not os.path.exists(args.checkpoint_dir):
+    raise ValueError(f'Checkpoint directory not found at {args.checkpoint_dir}')
 if not os.path.exists(args.labels_dict):
     raise ValueError(f'Dictionary with ids to labels not found at {args.labels_dict}')
 
-nf = nemo.core.NeuralModuleFactory(
-    backend=nemo.core.Backend.PyTorch, optimization_level=args.amp_opt_level, log_dir=None
-)
+nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, log_dir=None)
 
 labels_dict = get_vocab(args.labels_dict)
 
-""" Load the pretrained BERT parameters
-See the list of pretrained models, call:
-nemo_nlp.huggingface.BERT.list_pretrained_models()
-"""
-pretrained_bert_model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model)
-hidden_size = pretrained_bert_model.hidden_size
-tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
+model = nemo_nlp.nm.trainables.get_pretrained_lm_model(
+    pretrained_model_name=args.pretrained_model_name, config=args.bert_config, vocab=args.vocab_file
+)
+
+tokenizer = nemo.collections.nlp.data.tokenizers.get_tokenizer(
+    tokenizer_name=args.tokenizer,
+    pretrained_model_name=args.pretrained_model_name,
+    tokenizer_model=args.tokenizer_model,
+    vocab_file=args.vocab_file,
+    do_lower_case=args.do_lower_case,
+)
+hidden_size = model.hidden_size
+
 
 data_layer = nemo_nlp.nm.data_layers.BertTokenClassificationInferDataLayer(
     queries=args.queries, tokenizer=tokenizer, max_seq_length=args.max_seq_length, batch_size=1
 )
 
-classifier = TokenClassifier(hidden_size=hidden_size, num_classes=len(labels_dict), dropout=args.fc_dropout)
+classifier = TokenClassifier(hidden_size=hidden_size, num_classes=len(labels_dict))
 
 input_ids, input_type_ids, input_mask, _, subtokens_mask = data_layer()
 
-hidden_states = pretrained_bert_model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
+hidden_states = model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
 logits = classifier(hidden_states=hidden_states)
 
 ###########################################################################
 
 # Instantiate an optimizer to perform `infer` action
-evaluated_tensors = nf.infer(tensors=[logits, subtokens_mask], checkpoint_dir=args.work_dir)
+evaluated_tensors = nf.infer(tensors=[logits, subtokens_mask], checkpoint_dir=args.checkpoint_dir)
 
 
 def concatenate(lists):
diff --git a/examples/speaker_recognition/configs/quartznet_spkr_3x1x512_xvector.yaml b/examples/speaker_recognition/configs/quartznet_spkr_3x1x512_xvector.yaml
new file mode 100644
index 000000000000..868b9a0f01ff
--- /dev/null
+++ b/examples/speaker_recognition/configs/quartznet_spkr_3x1x512_xvector.yaml
@@ -0,0 +1,81 @@
+model: "GramVoxNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.5
+repeat:  &rep  1
+time_length: 8
+n_filters: &n_filters 512
+
+AudioToSpeechLabelDataLayer:
+    sample_rate: *sample_rate
+    train:
+        min_duration: 0.1
+        shuffle: true
+    eval:
+        min_duration: 0.01
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: false
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+
+    jasper:
+        -   filters: *n_filters
+            repeat: 1
+            kernel: [3]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [7]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [9]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: &enc_feat_out 1500
+            repeat: 1
+            kernel: [1]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            seperable: true
+
+JasperDecoderForSpkrClass:
+    feat_in: *enc_feat_out
+    pool_mode: 'xvector'
+    emb_sizes: 1024,1024
diff --git a/examples/speaker_recognition/configs/quartznet_spkr_3x2x512_xvector.yaml b/examples/speaker_recognition/configs/quartznet_spkr_3x2x512_xvector.yaml
new file mode 100644
index 000000000000..ebe0f8a400a7
--- /dev/null
+++ b/examples/speaker_recognition/configs/quartznet_spkr_3x2x512_xvector.yaml
@@ -0,0 +1,81 @@
+model: "GramVoxNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.5
+repeat:  &rep  2
+time_length: 8
+n_filters: &n_filters 512
+
+AudioToSpeechLabelDataLayer:
+    sample_rate: *sample_rate
+    train:
+        min_duration: 0.1
+        shuffle: true
+    eval:
+        min_duration: 0.01
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: false
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+
+    jasper:
+        -   filters: *n_filters
+            repeat: 1
+            kernel: [3]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [7]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [9]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: &enc_feat_out 1500
+            repeat: 1
+            kernel: [1]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            seperable: true
+
+JasperDecoderForSpkrClass:
+    feat_in: *enc_feat_out
+    pool_mode: 'xvector'
+    emb_sizes: 1024,1024
diff --git a/examples/speaker_recognition/configs/quartznet_spkr_5x1x512_xvector.yaml b/examples/speaker_recognition/configs/quartznet_spkr_5x1x512_xvector.yaml
new file mode 100644
index 000000000000..aa3855ac1f1a
--- /dev/null
+++ b/examples/speaker_recognition/configs/quartznet_spkr_5x1x512_xvector.yaml
@@ -0,0 +1,99 @@
+model: "GramVoxNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.5
+repeat:  &rep  1
+time_length: 8
+n_filters: &n_filters 512
+
+AudioToSpeechLabelDataLayer:
+    sample_rate: *sample_rate
+    train:
+        min_duration: 0.1
+        shuffle: true
+    eval:
+        min_duration: 0.01
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: false
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+
+    jasper:
+        -   filters: *n_filters
+            repeat: 1
+            kernel: [3]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [3]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [7]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [9]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: &enc_feat_out 1500
+            repeat: 1
+            kernel: [1]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            seperable: true
+
+JasperDecoderForSpkrClass:
+    feat_in: *enc_feat_out
+    pool_mode: xvector 
+    emb_sizes: 1024,1024
diff --git a/examples/speaker_recognition/hi-mia_eval.py b/examples/speaker_recognition/hi-mia_eval.py
new file mode 100644
index 000000000000..9e35e2d64d05
--- /dev/null
+++ b/examples/speaker_recognition/hi-mia_eval.py
@@ -0,0 +1,126 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import numpy as np
+from scipy.interpolate import interp1d
+from scipy.optimize import brentq
+from sklearn.metrics import roc_curve
+from tqdm import tqdm
+
+
+"""
+This script faciliates to get EER % based on cosine-smilarity 
+for HI-MIA dataset.
+
+Args:
+    data_root str: Path to embeddings file and also make sure trails_1m file is also
+    placed in this path
+    emb : test embedding file path 
+    emb_labels : embedding labels file path
+    emb_size :help="Embeddings size
+"""
+
+
+def get_acc(data_root='./myExps/hi-mia/', emb='', emb_labels='', emb_size=512):
+    basename = os.path.dirname(emb)
+    X_test = np.load(emb)
+    label_files = np.load(emb_labels)
+
+    assert len(X_test) == len(label_files)
+    trail_file = root + 'trials_1m'
+
+    test_list = {}
+    speaker_list = {}
+
+    for idx, line in enumerate(label_files):
+        line = line.strip()
+        speaker = line.split('.')[0].split('_')[0]
+        test_list[line] = idx
+
+        if speaker in speaker_list:
+            speaker_list[speaker].append(idx)
+        else:
+            speaker_list[speaker] = [idx]
+
+    emb = int(emb_size)
+
+    tmp_file = open(trail_file, 'r').readlines()
+    trail_score = open('trial_score.txt', 'w')
+
+    trial_embs = []
+    keys = []
+    all_scores = []
+    all_keys = []
+
+    # for each of trails in trial file
+    for line in tqdm(tmp_file):
+        line = line.strip()
+        x_speaker = line.split(' ')[0]
+        y_speaker = line.split(' ')[1]
+
+        X = np.zeros(emb,)
+        for idx in speaker_list[x_speaker]:
+            X = X + X_test[idx]
+
+        X = X / len(speaker_list[x_speaker])
+
+        if x_speaker not in keys:
+            keys.append(x_speaker)
+            trial_embs.extend([X])
+
+        Y = np.zeros(emb,)
+        for idx in speaker_list[y_speaker]:
+            Y = Y + X_test[idx]
+
+        Y = Y / len(speaker_list[y_speaker])
+
+        if y_speaker not in keys:
+            keys.append(y_speaker)
+            trial_embs.extend([Y])
+
+        # X=Y
+        score = (X @ Y.T) / (((X @ X.T) * (Y @ Y.T)) ** 0.5)
+        score = (score + 1) / 2
+
+        all_scores.append(score)
+        truth = 0 if line.split(' ')[-1] == 'nontarget' else 1
+
+        all_keys.append(truth)
+
+        trail_score.write(str(score) + "\t" + line.split(' ')[-1])
+        trail_score.write('\n')
+
+    np.save(basename + '/all_embs_himia.npy', np.asarray(trial_embs))
+    np.save(basename + '/all_ids_himia.npy', np.asarray(keys))
+
+    return np.asarray(all_scores), np.asarray(all_keys)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", help="directory of embeddings location", type=str, required=True)
+    parser.add_argument("--emb", help="test embedding file path", type=str, required=True)
+    parser.add_argument("--emb_labels", help="embedding labels file path", type=str, required=True)
+    parser.add_argument("--emb_size", help="Embeddings size", type=int, required=True)
+    args = parser.parse_args()
+    root, emb, emb_labels, emb_size = args.data_root, args.emb, args.emb_labels, args.emb_size
+
+    y_score, y = get_acc(data_root=root, emb=emb, emb_labels=emb_labels, emb_size=emb_size)
+    fpr, tpr, thresholds = roc_curve(y, y_score, pos_label=1)
+
+    eer = brentq(lambda x: 1.0 - x - interp1d(fpr, tpr)(x), 0.0, 1.0)
+    print("EER: {:.2f}%".format(eer * 100))
diff --git a/examples/speaker_recognition/kaldi_plda.py b/examples/speaker_recognition/kaldi_plda.py
new file mode 100644
index 000000000000..92e1f397801a
--- /dev/null
+++ b/examples/speaker_recognition/kaldi_plda.py
@@ -0,0 +1,56 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+
+import numpy as np
+from kaldi_python_io import ArchiveWriter
+
+
+def write_scp(root, filename, lines, train):
+    assert len(lines) == len(train)
+    filename = os.path.join(root, filename)
+    with ArchiveWriter(filename + '.ark', filename + '.scp') as writer:
+        for key, mat in zip(lines, train):
+            writer.write(key, mat)
+    print("wrote {}.ark".format(filename))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--root", help="embeddings root path", type=str, required=True)
+    parser.add_argument("--train_embs", help="npy of train embs for PLDA training", type=str, required=True)
+    parser.add_argument("--train_labels", help="npy of train labels for PLDA training", type=str, required=True)
+    parser.add_argument("--eval_embs", help="npy of eval embb for PLDA testing", type=str, required=True)
+    parser.add_argument("--eval_labels", help="npy of eval labels for PLDA testing", type=str, required=True)
+    parser.add_argument("--stage", help="1 for test on already trained PLDA 2 otherwise", type=str, required=True)
+    args = parser.parse_args()
+
+    root = args.root
+
+    if int(args.stage) < 2:
+        train = np.load(args.train_embs)
+        labels = np.load(args.train_labels)
+
+        write_scp(root, 'train', labels, train)
+
+    eval = np.load(args.eval_embs)
+    labels = np.load(args.eval_labels)
+
+    write_scp(root, 'dev', labels, eval)
+
+    cmd = ['bash', 'train_plda.sh', root, args.stage]
+    subprocess.run(cmd)
diff --git a/examples/speaker_recognition/notebooks/Speaker_Recognition_an4.ipynb b/examples/speaker_recognition/notebooks/Speaker_Recognition_an4.ipynb
new file mode 100644
index 000000000000..219ba9e9dde6
--- /dev/null
+++ b/examples/speaker_recognition/notebooks/Speaker_Recognition_an4.ipynb
@@ -0,0 +1,748 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "colab_type": "code",
+    "id": "kUlQMiPZxfS_",
+    "outputId": "cee17d53-c44c-4821-ebeb-4fa347c316b2"
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "import os\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "!pip install wget\n",
+    "!apt-get install sox\n",
+    "# !pip install nemo_toolkit[asr]==0.10.0b10\n",
+    "!git clone https://github.com/NVIDIA/NeMo.git\n",
+    "os.chdir('NeMo')\n",
+    "!bash reinstall.sh\n",
+    "!pip install unidecode"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "VgTR8CMlxu3p"
+   },
+   "source": [
+    "# **SPEAKER RECOGNITION** \n",
+    "\n",
+    "Speaker Recognition (SR) is a broad research area which solves two major tasks: speaker identification (who is speaking?) and\n",
+    "speaker verification (is the speaker who they claim to be?). In this work, we focus on far-field,\n",
+    "text-independent speaker recognition when the identity of the speaker is based on how the speech is spoken,\n",
+    "not necessarily in what is being said. Typically such SR systems operate on unconstrained speech utterances,\n",
+    "which are converted into vectors of fixed length, called speaker embeddings. Speaker embeddings are also used in\n",
+    "automatic speech recognition (ASR) and speech synthesis.\n",
+    "\n",
+    "As the goal of most speaker related systems is to get good speaker level embeddings that could help distinguish from\n",
+    "other speakers, we shall first train these embeddings in end-to-end\n",
+    "manner optimizing the [QuatzNet](https://arxiv.org/abs/1910.10261) based encoder model on cross-entropy loss.\n",
+    "We modify the decoder to get these fixed size embeddings irrespective of the length of ithe nput audio. We employ a mean and variance\n",
+    "based statistics pooling method to grab these embeddings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "KzzOC5rpx9y6"
+   },
+   "source": [
+    "In this tutorial, we shall first train these embeddings on speaker related datasets, and then get speaker embeddings from a pretrained network for a new dataset. Since Google Colab has very slow read-write speeds, I'll be demonstarting this tutorial using [an4](http://www.speech.cs.cmu.edu/databases/an4/). \n",
+    "\n",
+    "Instead if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/blob/master/scripts/get_hi-mia_data.py) script to download the necessary files, extract them, also re-sample to 16Khz if any of these samples are not at 16Khz. We do also provide scripts to score these embeddings for a speaker-verification task like hi-mia dataset. To do that follow this detailed [tutorial](https://nvidia.github.io/NeMo/) or [notebook](https://github.com/NVIDIA/NeMo/blob/master/examples/speaker_recognition/notebooks/Speaker_Recognition_hi-mia.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 119
+    },
+    "colab_type": "code",
+    "id": "UO_hAhMx0rwv",
+    "outputId": "493bd23a-d07a-46db-e634-d38a09f70ef3"
+   },
+   "outputs": [],
+   "source": [
+    "print(os.getcwd())\n",
+    "data_dir = 'data'\n",
+    "!mkdir $data_dir\n",
+    "import glob\n",
+    "import subprocess\n",
+    "import tarfile\n",
+    "import wget\n",
+    "\n",
+    "# Download the dataset. This will take a few moments...\n",
+    "print(\"******\")\n",
+    "if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):\n",
+    "    an4_url = 'http://www.speech.cs.cmu.edu/databases/an4/an4_sphere.tar.gz'\n",
+    "    an4_path = wget.download(an4_url, data_dir)\n",
+    "    print(f\"Dataset downloaded at: {an4_path}\")\n",
+    "else:\n",
+    "    print(\"Tarfile already exists.\")\n",
+    "    an4_path = data_dir + '/an4_sphere.tar.gz'\n",
+    "\n",
+    "# Untar and convert .sph to .wav (using sox)\n",
+    "tar = tarfile.open(an4_path)\n",
+    "tar.extractall(path=data_dir)\n",
+    "\n",
+    "print(\"Converting .sph to .wav...\")\n",
+    "sph_list = glob.glob(data_dir + '/an4/**/*.sph', recursive=True)\n",
+    "for sph_path in sph_list:\n",
+    "    wav_path = sph_path[:-4] + '.wav'\n",
+    "    cmd = [\"sox\", sph_path, wav_path]\n",
+    "    subprocess.run(cmd)\n",
+    "print(\"Finished conversion.\\n******\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "LEKDkOSimsKr"
+   },
+   "source": [
+    "Since an4 is not designed for speaker recognition, this facilitates the oppurtunity to demostrate how you can generate manifest files that are necessary for training. These methods can be applied to any dataset to get similar training manifest files. \n",
+    "\n",
+    "First get a scp file(s) which has all the wav files with absolute paths for each of train, dev, and test set. This can be easily done by the `find` bash command"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "0e6nuOFN8Pfv",
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "!find $PWD/data/an4/wav/an4_clstk  -iname \"*.wav\" > data/an4/wav/an4_clstk/train_all.scp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "7168Z9eXn4st"
+   },
+   "source": [
+    "Let's look at the first 3 lines of scp file for train. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 68
+    },
+    "colab_type": "code",
+    "id": "SQupCVpZIvtL",
+    "outputId": "e45cf645-42fc-4f4f-bd94-964848e04145"
+   },
+   "outputs": [],
+   "source": [
+    "!head -n 3 $data_dir/an4/wav/an4_clstk/train_all.scp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "cN09z0XFoDjN"
+   },
+   "source": [
+    "Since we created the scp file for train, we use `scp_to_manifest.py` to convert this scp file to a manifest file and then optionally split the files to train \\& dev for evaluating the models while training by using the `--split` flag. We wouldn't be needing the `--split` option for test folder. \n",
+    "Accordingly please mention the `id` number, which is the field num seperated by `/` to be considered as the speaker label "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 85
+    },
+    "colab_type": "code",
+    "id": "fNXZwNexIkAo",
+    "outputId": "ca06c4be-c0f6-4ec7-8198-a26347ea4b1e"
+   },
+   "outputs": [],
+   "source": [
+    "!python scripts/scp_to_manifest.py --scp $data_dir/an4/wav/an4_clstk/train_all.scp --id -2 --out $data_dir/an4/wav/an4_clstk/all_manifest.json --split"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dxUL_g77oned"
+   },
+   "source": [
+    "Generate the scp for the test folder and then convert it to a manifest. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "QShlVwEIO64D",
+    "outputId": "291d7dce-e202-4062-9eee-e43224084cb5"
+   },
+   "outputs": [],
+   "source": [
+    "!find $PWD/data/an4/wav/an4test_clstk  -iname \"*.wav\" > data/an4/wav/an4test_clstk/test_all.scp\n",
+    "!python scripts/scp_to_manifest.py --scp data/an4/wav/an4test_clstk/test_all.scp --id -2 --out data/an4/wav/an4test_clstk/test.json"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "F4rBMntjpPph"
+   },
+   "source": [
+    "Import necessary packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 187
+    },
+    "colab_type": "code",
+    "id": "4mSWNvdZPIwR",
+    "outputId": "83455882-4924-4d18-afd3-d2c8ee8ed78d"
+   },
+   "outputs": [],
+   "source": [
+    "from ruamel.yaml import YAML\n",
+    "\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "import copy\n",
+    "from functools import partial"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "CeKfJQ-YpTOv"
+   },
+   "source": [
+    "# Building Training and Evaluation DAGs with NeMo\n",
+    "Building a model using NeMo consists of \n",
+    "\n",
+    "1.  Instantiating the neural modules we need\n",
+    "2.  specifying the DAG by linking them together.\n",
+    "\n",
+    "In NeMo, the training and inference pipelines are managed by a NeuralModuleFactory, which takes care of checkpointing, callbacks, and logs, along with other details in training and inference. We set its log_dir argument to specify where our model logs and outputs will be written, and can set other training and inference settings in its constructor. For instance, if we were resuming training from a checkpoint, we would set the argument `checkpoint_dir=<path_to_checkpoint>`.\n",
+    "\n",
+    "Along with logs in NeMo, you can optionally view the tensorboard logs with the create_tb_writer=True argument to the NeuralModuleFactory. By default all the tensorboard log files will be stored in {log_dir}/tensorboard, but you can change this with the tensorboard_dir argument. One can load tensorboard logs through tensorboard by running `tensorboard --logdir=<path_to_tensorboard dir>` in the terminal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "uyn2xrR7R1K_"
+   },
+   "outputs": [],
+   "source": [
+    "exp_name = 'quartznet3x1_an4'\n",
+    "work_dir = './myExps/'\n",
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "    log_dir=work_dir+\"/as4_logdir/\",\n",
+    "    checkpoint_dir=\"./myExps/checkpoints/\" + exp_name,\n",
+    "    create_tb_writer=True,\n",
+    "    random_seed=42,\n",
+    "    tensorboard_dir=work_dir+'/tensorboard/',\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "k-juqc40p8KN"
+   },
+   "source": [
+    "Now that we have our neural module factory, we can specify our **neural modules and instantiate them**. Here, we load the parameters for each module from the configuration file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "mC-KPOy-rpLA",
+    "outputId": "1d902505-6e35-4eb8-aebf-8401c1bdd39c"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.utils import logging\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open('../configs/quartznet_spkr_3x1x512_xvector.yaml') as f:\n",
+    "    spkr_params = yaml.load(f)\n",
+    "\n",
+    "sample_rate = spkr_params[\"sample_rate\"]\n",
+    "time_length = spkr_params.get(\"time_length\", 8)\n",
+    "logging.info(\"max time length considered for each file is {} sec\".format(time_length))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5VgzNS1lrrqS"
+   },
+   "source": [
+    "Instantiate the train data_layer using config arguments. `labels = None` automatically creates output labels from the manifest files, if you would like to pass those speaker names you can use the labels option. So while instantiating eval data_layer, we can pass labels to the class in order to match same the speaker output labels as we have in the training data layer. This comes in handy while training on multiple datasets with more than one manifest file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 153
+    },
+    "colab_type": "code",
+    "id": "dC9QOenNPoUs",
+    "outputId": "786aac99-57f6-4066-e9a3-4908dc6e3d7a"
+   },
+   "outputs": [],
+   "source": [
+    "train_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "train_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"train\"])\n",
+    "del train_dl_params[\"train\"]\n",
+    "del train_dl_params[\"eval\"]\n",
+    "\n",
+    "batch_size=64\n",
+    "data_layer_train = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "        manifest_filepath='../data/an4/wav/an4_clstk/train.json',\n",
+    "        labels=None,\n",
+    "        batch_size=batch_size,\n",
+    "        time_length=time_length,\n",
+    "        **train_dl_params,\n",
+    "    )\n",
+    "\n",
+    "eval_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "eval_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"eval\"])\n",
+    "del eval_dl_params[\"train\"]\n",
+    "del eval_dl_params[\"eval\"]\n",
+    "\n",
+    "data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "    manifest_filepath=\"../data/an4/wav/an4_clstk/dev.json\",\n",
+    "    labels=data_layer_train.labels,\n",
+    "    batch_size=batch_size,\n",
+    "    time_length=time_length,\n",
+    "    **eval_dl_params,\n",
+    ")\n",
+    "\n",
+    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
+    "        sample_rate=sample_rate, **spkr_params[\"AudioToMelSpectrogramPreprocessor\"],\n",
+    "    )\n",
+    "encoder = nemo_asr.JasperEncoder(**spkr_params[\"JasperEncoder\"],)\n",
+    "\n",
+    "decoder = nemo_asr.JasperDecoderForSpkrClass(\n",
+    "        feat_in=spkr_params[\"JasperEncoder\"][\"jasper\"][-1][\"filters\"],\n",
+    "        num_classes=data_layer_train.num_classes,\n",
+    "        pool_mode=spkr_params[\"JasperDecoderForSpkrClass\"]['pool_mode'],\n",
+    "        emb_sizes=spkr_params[\"JasperDecoderForSpkrClass\"][\"emb_sizes\"].split(\",\"),\n",
+    "    )\n",
+    "\n",
+    "xent_loss = nemo_asr.CrossEntropyLossNM(weight=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "9bAP70DqsXGY"
+   },
+   "source": [
+    "The next step is to assemble our training DAG by specifying the inputs to each neural module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 224
+    },
+    "colab_type": "code",
+    "id": "1raBGmd5Vshl",
+    "outputId": "33a128f4-193f-4913-9c82-fd27610dfb9a"
+   },
+   "outputs": [],
+   "source": [
+    "audio_signal, audio_signal_len, label, label_len = data_layer_train()\n",
+    "processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)\n",
+    "encoded, encoded_len = encoder(audio_signal=processed_signal, length=processed_signal_len)\n",
+    "logits, _ = decoder(encoder_output=encoded)\n",
+    "loss = xent_loss(logits=logits, labels=label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "uwnZT8ycsYMa"
+   },
+   "source": [
+    "We would like to be able to evaluate our model on the dev set, as well, so let's set up the evaluation DAG.\n",
+    "\n",
+    "Our evaluation DAG will reuse most of the parts of the training DAG with the exception of the data layer, since we are loading the evaluation data from a different file but evaluating on the same model. Note that if we were using data augmentation in training, we would also leave that out in the evaluation DAG."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 224
+    },
+    "colab_type": "code",
+    "id": "sPPyiNtLWDyf",
+    "outputId": "3c37a7dd-b85c-4d29-edfe-cfd0cd16abe4"
+   },
+   "outputs": [],
+   "source": [
+    "audio_signal_test, audio_len_test, label_test, _ = data_layer_eval()\n",
+    "processed_signal_test, processed_len_test = data_preprocessor(\n",
+    "            input_signal=audio_signal_test, length=audio_len_test\n",
+    "        )\n",
+    "encoded_test, encoded_len_test = encoder(audio_signal=processed_signal_test, length=processed_len_test)\n",
+    "logits_test, _ = decoder(encoder_output=encoded_test)\n",
+    "loss_test = xent_loss(logits=logits_test, labels=label_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "8m7dz1-usp1S"
+   },
+   "source": [
+    "# Creating CallBacks\n",
+    "\n",
+    "We would like to be able to monitor our model while it's training, so we use callbacks. In general, callbacks are functions that are called at specific intervals over the course of training or inference, such as at the start or end of every n iterations, epochs, etc. The callbacks we'll be using for this are the SimpleLossLoggerCallback, which reports the training loss (or another metric of your choosing, such as \\% accuracy for speaker recognition tasks), and the EvaluatorCallback, which regularly evaluates the model on the dev set. Both of these callbacks require you to pass in the tensors to be evaluated--these would be the final outputs of the training and eval DAGs above.\n",
+    "\n",
+    "Another useful callback is the CheckpointCallback, for saving checkpoints at set intervals. We create one here just to demonstrate how it works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "LFlXnbRaWTVl"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.collections.asr.helpers import (\n",
+    "    monitor_classification_training_progress,\n",
+    "    process_classification_evaluation_batch,\n",
+    "    process_classification_evaluation_epoch,\n",
+    ")\n",
+    "from nemo.utils.lr_policies import CosineAnnealing\n",
+    "\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "        tensors=[loss, logits, label],\n",
+    "        print_func=partial(monitor_classification_training_progress, eval_metric=[1]),\n",
+    "        step_freq=40,\n",
+    "        get_tb_values=lambda x: [(\"train_loss\", x[0])],\n",
+    "        tb_writer=neural_factory.tb_writer,\n",
+    "    )\n",
+    "\n",
+    "callbacks = [train_callback]\n",
+    "\n",
+    "chpt_callback = nemo.core.CheckpointCallback(\n",
+    "            folder=\"./myExps/checkpoints/\" + exp_name,\n",
+    "            load_from_folder=\"./myExps/checkpoints/\" + exp_name,\n",
+    "            step_freq=100,\n",
+    "        )\n",
+    "callbacks.append(chpt_callback)\n",
+    "\n",
+    "tagname = \"an4_dev\"\n",
+    "eval_callback = nemo.core.EvaluatorCallback(\n",
+    "            eval_tensors=[loss_test, logits_test, label_test],\n",
+    "            user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),\n",
+    "            user_epochs_done_callback=partial(process_classification_evaluation_epoch, tag=tagname),\n",
+    "            eval_step=100,  # How often we evaluate the model on the test set\n",
+    "            tb_writer=neural_factory.tb_writer,\n",
+    "        )\n",
+    "\n",
+    "callbacks.append(eval_callback)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "a8EFjLsWs_jM"
+   },
+   "source": [
+    "Now that we have our model and callbacks set up, how do we run it?\n",
+    "\n",
+    "Once we create our neural factory and the callbacks for the information that we want to see, we can start training by simply calling the train function on the tensors we want to optimize and our callbacks! Since this notebook is for you to get started and since the an4 as dataset is small, it would quickly get higher accuracies. For better models use bigger datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "colab_type": "code",
+    "id": "xHTEtz7yXVMK",
+    "outputId": "bd53ae06-cd0d-4291-da66-1af3079cbd86"
+   },
+   "outputs": [],
+   "source": [
+    "# train model\n",
+    "num_epochs=100\n",
+    "N = len(data_layer_train)\n",
+    "steps_per_epoch = N // batch_size\n",
+    "\n",
+    "logging.info(\"Number of steps per epoch {}\".format(steps_per_epoch))\n",
+    "\n",
+    "neural_factory.train(\n",
+    "        tensors_to_optimize=[loss],\n",
+    "        callbacks=callbacks,\n",
+    "        lr_policy=CosineAnnealing(\n",
+    "            num_epochs * steps_per_epoch, warmup_steps=0.1 * num_epochs * steps_per_epoch,\n",
+    "        ),\n",
+    "        optimizer=\"novograd\",\n",
+    "        optimization_params={\n",
+    "            \"num_epochs\": num_epochs,\n",
+    "            \"lr\": 0.01,\n",
+    "            \"betas\": (0.95, 0.5),\n",
+    "            \"weight_decay\": 0.001,\n",
+    "            \"grad_norm_clip\": None,\n",
+    "        }\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "BB6s19pmxGfX"
+   },
+   "source": [
+    "Now that we trained our embeddings, we shall extract these embeddings using our pretrained checkpoint present at `checkpoint_dir`. As we can see from the neural architecture, we extract the embeddings after the `emb1` layer. \n",
+    "![Speaker Recognition Layers](./speaker_reco.jpg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "oSIDu6jkym66"
+   },
+   "source": [
+    "Now use the test manifest to get the embeddings. As we saw before, let's create a new `data_layer` for test. Use previously instiated models and attach the DAGs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 258
+    },
+    "colab_type": "code",
+    "id": "5JqUVbKDY32a",
+    "outputId": "dd835e02-8882-4287-9639-c249ac3dfc94"
+   },
+   "outputs": [],
+   "source": [
+    "eval_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "eval_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"eval\"])\n",
+    "del eval_dl_params[\"train\"]\n",
+    "del eval_dl_params[\"eval\"]\n",
+    "eval_dl_params['shuffle'] = False  # To grab  the file names without changing data_layer\n",
+    "\n",
+    "test_dataset = '../data/an4/wav/an4test_clstk/test.json'\n",
+    "data_layer_test = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "        manifest_filepath=test_dataset,\n",
+    "        labels=None,\n",
+    "        batch_size=batch_size,\n",
+    "        **eval_dl_params,\n",
+    "    )\n",
+    "\n",
+    "audio_signal_test, audio_len_test, label_test, _ = data_layer_test()\n",
+    "processed_signal_test, processed_len_test = data_preprocessor(\n",
+    "    input_signal=audio_signal_test, length=audio_len_test)\n",
+    "encoded_test, _ = encoder(audio_signal=processed_signal_test, length=processed_len_test)\n",
+    "_, embeddings = decoder(encoder_output=encoded_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dwEifkD9zfpl"
+   },
+   "source": [
+    "Now get the embeddings using `neural_factory.infer` command. It does a forward pass of all our modules and save our embeddings in `<work_dir>/embeddings`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 153
+    },
+    "colab_type": "code",
+    "id": "wGxYiFpJze5h",
+    "outputId": "dbbc7204-28bc-43e9-b6aa-f3f757f5d4b5"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import json\n",
+    "eval_tensors = neural_factory.infer(tensors=[embeddings, label_test], checkpoint_dir=\"./myExps/checkpoints/\" + exp_name)\n",
+    "    # inf_loss , inf_emb, inf_logits, inf_label = eval_tensors\n",
+    "inf_emb, inf_label = eval_tensors\n",
+    "whole_embs = []\n",
+    "whole_labels = []\n",
+    "manifest = open(test_dataset, 'r').readlines()\n",
+    "\n",
+    "for line in manifest:\n",
+    "    line = line.strip()\n",
+    "    dic = json.loads(line)\n",
+    "    filename = dic['audio_filepath'].split('/')[-1]\n",
+    "    whole_labels.append(filename)\n",
+    "\n",
+    "for idx in range(len(inf_label)):\n",
+    "    whole_embs.extend(inf_emb[idx].numpy())\n",
+    "\n",
+    "embedding_dir = './myExps/embeddings/'\n",
+    "if not os.path.exists(embedding_dir):\n",
+    "    os.mkdir(embedding_dir)\n",
+    "\n",
+    "filename = os.path.basename(test_dataset).split('.')[0]\n",
+    "name = embedding_dir + filename\n",
+    "\n",
+    "np.save(name + '.npy', np.asarray(whole_embs))\n",
+    "np.save(name + '_labels.npy', np.asarray(whole_labels))\n",
+    "logging.info(\"Saved embedding files to {}\".format(embedding_dir))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "SKKVIb7e6vel",
+    "outputId": "a3fa3703-da6c-4a07-c20c-c83df11a8f25"
+   },
+   "outputs": [],
+   "source": [
+    "ls myExps/embeddings/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "A_7S4Yja7A8V"
+   },
+   "source": [
+    "Since an4 doesn't have trails files to demonstrate cosine and PLDA scoring, a tutorial for that can be found at\n",
+    "[hi-mia notebook](https://github.com/NVIDIA/NeMo/blob/master/examples/speaker_recognition/notebooks/Speaker_Recognition_an4.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Speaker_Recognition_dataset.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/examples/speaker_recognition/notebooks/Speaker_Recognition_hi-mia.ipynb b/examples/speaker_recognition/notebooks/Speaker_Recognition_hi-mia.ipynb
new file mode 100644
index 000000000000..51b5ef38c8c8
--- /dev/null
+++ b/examples/speaker_recognition/notebooks/Speaker_Recognition_hi-mia.ipynb
@@ -0,0 +1,769 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "colab_type": "code",
+    "id": "kUlQMiPZxfS_",
+    "outputId": "cee17d53-c44c-4821-ebeb-4fa347c316b2"
+   },
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "import os\n",
+    "!pip install wget\n",
+    "!apt-get install sox\n",
+    "\n",
+    "!git clone https://github.com/NVIDIA/NeMo.git\n",
+    "os.chdir('NeMo')\n",
+    "!bash reinstall.sh\n",
+    "\n",
+    "!pip install unidecode"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "VgTR8CMlxu3p"
+   },
+   "source": [
+    "# **SPEAKER RECOGNITION** \n",
+    "\n",
+    "Speaker Recognition (SR) is a broad research area which solves two major tasks: speaker identification (who is speaking?) and\n",
+    "speaker verification (is the speaker who they claim to be?). In this work, we focus on far-field,\n",
+    "text-independent speaker recognition when the identity of the speaker is based on how the speech is spoken,\n",
+    "not necessarily in what is being said. Typically such SR systems operate on unconstrained speech utterances,\n",
+    "which are converted into vectors of fixed length, called speaker embeddings. Speaker embeddings are also used in\n",
+    "automatic speech recognition (ASR) and speech synthesis.\n",
+    "\n",
+    "As the goal of most speaker related systems is to get good speaker level embeddings that could help distinguish from\n",
+    "other speakers, we shall first train these embeddings in end-to-end\n",
+    "manner optimizing the [QuatzNet](https://arxiv.org/abs/1910.10261) based encoder model on cross-entropy loss.\n",
+    "We modify the decoder to get these fixed size embeddings irrespective of the length of ithe nput audio. We employ a mean and variance\n",
+    "based statistics pooling method to grab these embeddings."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "KzzOC5rpx9y6"
+   },
+   "source": [
+    "In this tutorial, we shall first train these embeddings on speaker related datasets and then get speaker embeddings from a pretrained network for a new dataset. Since Google Colab has very slow read-write speeds, Please run this locally for training on [hi-mia](https://arxiv.org/abs/1912.01231). \n",
+    "\n",
+    "We use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/blob/master/scripts/get_hi-mia_data.py) script to download the necessary files, extract them, and also re-sample to 16Khz if any of these samples are not at 16Khz. We provide scripts to score these embeddings for a speaker-verification task like hi-mia dataset at the end. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 119
+    },
+    "colab_type": "code",
+    "id": "UO_hAhMx0rwv",
+    "outputId": "493bd23a-d07a-46db-e634-d38a09f70ef3"
+   },
+   "outputs": [],
+   "source": [
+    "data_dir = 'scripts/data/'\n",
+    "!mkdir $data_dir\n",
+    "\n",
+    "# Download and process dataset. This will take a few moments...\n",
+    "!python scripts/get_hi-mia_data.py --data_root=$data_data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the download and conversion, your `data` folder should contain directories with manifest files as:\n",
+    "\n",
+    "* `data/<set>/train.json`\n",
+    "* `data/<set>/dev.json` \n",
+    "* `data/<set>/{set}_all.json` \n",
+    "\n",
+    "For each set, we create utt2spk files, these files would be used later in PLDA training.\n",
+    "\n",
+    "Each line in the manifest file describes a training sample - `audio_filepath` contains the path to the wav file, `duration` it's duration in seconds, and `label` is the speaker class label:\n",
+    "\n",
+    "`{\"audio_filepath\": \"<absolute path to dataset>/data/train/SPEECHDATA/wav/SV0184/SV0184_6_04_N3430.wav\", \"duration\": 1.22, \"label\": \"SV0184\"}` \n",
+    "\n",
+    "`{\"audio_filepath\": \"<absolute path to dataset>/data/train/SPEECHDATA/wav/SV0184/SV0184_5_03_F2037.wav\", duration\": 1.375, \"label\": \"SV0184\"}`\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "F4rBMntjpPph"
+   },
+   "source": [
+    "Import necessary packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 187
+    },
+    "colab_type": "code",
+    "id": "4mSWNvdZPIwR",
+    "outputId": "83455882-4924-4d18-afd3-d2c8ee8ed78d"
+   },
+   "outputs": [],
+   "source": [
+    "from ruamel.yaml import YAML\n",
+    "\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "import copy\n",
+    "from functools import partial"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "CeKfJQ-YpTOv"
+   },
+   "source": [
+    "# Building Training and Evaluation DAGs with NeMo\n",
+    "Building a model using NeMo consists of \n",
+    "\n",
+    "1.  Instantiating the neural modules we need\n",
+    "2.  specifying the DAG by linking them together.\n",
+    "\n",
+    "In NeMo, the training and inference pipelines are managed by a NeuralModuleFactory, which takes care of checkpointing, callbacks, and logs, along with other details in training and inference. We set its log_dir argument to specify where our model logs and outputs will be written, and can set other training and inference settings in its constructor. For instance, if we were resuming training from a checkpoint, we would set the argument `checkpoint_dir=<path_to_checkpoint>`.\n",
+    "\n",
+    "Along with logs in NeMo, you can optionally view the tensorboard logs with the create_tb_writer=True argument to the NeuralModuleFactory. By default all the tensorboard log files will be stored in {log_dir}/tensorboard, but you can change this with the tensorboard_dir argument. One can load tensorboard logs through tensorboard by running `tensorboard --logdir=<path_to_tensorboard dir>` in the terminal."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "uyn2xrR7R1K_"
+   },
+   "outputs": [],
+   "source": [
+    "exp_name = 'quartznet3x2_hi-mia'\n",
+    "work_dir = './myExps/'\n",
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "    log_dir=work_dir+\"/hi-mia_logdir/\",\n",
+    "    checkpoint_dir=\"./myExps/checkpoints/\" + exp_name,\n",
+    "    create_tb_writer=True,\n",
+    "    random_seed=42,\n",
+    "    tensorboard_dir=work_dir+'/tensorboard/',\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "k-juqc40p8KN"
+   },
+   "source": [
+    "Now that we have our neural module factory, we can specify our **neural modules and instantiate them**. Here, we load the parameters for each module from the configuration file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "mC-KPOy-rpLA",
+    "outputId": "1d902505-6e35-4eb8-aebf-8401c1bdd39c"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.utils import logging\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open('examples/speaker_recognition/configs/quartznet_spkr_3x2x512_xvector.yaml') as f:\n",
+    "    spkr_params = yaml.load(f)\n",
+    "\n",
+    "sample_rate = spkr_params[\"sample_rate\"]\n",
+    "time_length = spkr_params.get(\"time_length\", 8)\n",
+    "logging.info(\"max time length considered for each file is {} sec\".format(time_length))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "5VgzNS1lrrqS"
+   },
+   "source": [
+    "Instantiate the train data_layer using config arguments. `labels = None` automatically creates output labels from the manifest files, if you would like to pass those speaker names you can use the labels option. So while instantiating eval data_layer, we can pass labels to the class in order to match same the speaker output labels as we have in the training data layer. This comes in handy while training on multiple datasets with more than one manifest file. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 153
+    },
+    "colab_type": "code",
+    "id": "dC9QOenNPoUs",
+    "outputId": "786aac99-57f6-4066-e9a3-4908dc6e3d7a"
+   },
+   "outputs": [],
+   "source": [
+    "train_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "train_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"train\"])\n",
+    "del train_dl_params[\"train\"]\n",
+    "del train_dl_params[\"eval\"]\n",
+    "\n",
+    "batch_size=64\n",
+    "data_layer_train = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "        manifest_filepath=data_dir+'/train/train.json',\n",
+    "        labels=None,\n",
+    "        batch_size=batch_size,\n",
+    "        time_length=time_length,\n",
+    "        **train_dl_params,\n",
+    "    )\n",
+    "\n",
+    "eval_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "eval_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"eval\"])\n",
+    "del eval_dl_params[\"train\"]\n",
+    "del eval_dl_params[\"eval\"]\n",
+    "\n",
+    "data_layer_eval = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "    manifest_filepath=data_dir+'/train/dev.json\",\n",
+    "    labels=data_layer_train.labels,\n",
+    "    batch_size=batch_size,\n",
+    "    time_length=time_length,\n",
+    "    **eval_dl_params,\n",
+    ")\n",
+    "\n",
+    "data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
+    "        sample_rate=sample_rate, **spkr_params[\"AudioToMelSpectrogramPreprocessor\"],\n",
+    "    )\n",
+    "encoder = nemo_asr.JasperEncoder(**spkr_params[\"JasperEncoder\"],)\n",
+    "\n",
+    "decoder = nemo_asr.JasperDecoderForSpkrClass(\n",
+    "        feat_in=spkr_params[\"JasperEncoder\"][\"jasper\"][-1][\"filters\"],\n",
+    "        num_classes=data_layer_train.num_classes,\n",
+    "        pool_mode=spkr_params[\"JasperDecoderForSpkrClass\"]['pool_mode'],\n",
+    "        emb_sizes=spkr_params[\"JasperDecoderForSpkrClass\"][\"emb_sizes\"].split(\",\"),\n",
+    "    )\n",
+    "\n",
+    "xent_loss = nemo_asr.CrossEntropyLossNM(weight=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "9bAP70DqsXGY"
+   },
+   "source": [
+    "The next step is to assemble our training DAG by specifying the inputs to each neural module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 224
+    },
+    "colab_type": "code",
+    "id": "1raBGmd5Vshl",
+    "outputId": "33a128f4-193f-4913-9c82-fd27610dfb9a"
+   },
+   "outputs": [],
+   "source": [
+    "audio_signal, audio_signal_len, label, label_len = data_layer_train()\n",
+    "processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)\n",
+    "encoded, encoded_len = encoder(audio_signal=processed_signal, length=processed_signal_len)\n",
+    "logits, _ = decoder(encoder_output=encoded)\n",
+    "loss = xent_loss(logits=logits, labels=label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "uwnZT8ycsYMa"
+   },
+   "source": [
+    "We would like to be able to evaluate our model on the dev set, as well, so let's set up the evaluation DAG.\n",
+    "\n",
+    "Our evaluation DAG will reuse most of the parts of the training DAG with the exception of the data layer, since we are loading the evaluation data from a different file but evaluating on the same model. Note that if we were using data augmentation in training, we would also leave that out in the evaluation DAG."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 224
+    },
+    "colab_type": "code",
+    "id": "sPPyiNtLWDyf",
+    "outputId": "3c37a7dd-b85c-4d29-edfe-cfd0cd16abe4"
+   },
+   "outputs": [],
+   "source": [
+    "audio_signal_test, audio_len_test, label_test, _ = data_layer_eval()\n",
+    "processed_signal_test, processed_len_test = data_preprocessor(\n",
+    "            input_signal=audio_signal_test, length=audio_len_test\n",
+    "        )\n",
+    "encoded_test, encoded_len_test = encoder(audio_signal=processed_signal_test, length=processed_len_test)\n",
+    "logits_test, _ = decoder(encoder_output=encoded_test)\n",
+    "loss_test = xent_loss(logits=logits_test, labels=label_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "8m7dz1-usp1S"
+   },
+   "source": [
+    "# Creating CallBacks\n",
+    "\n",
+    "We would like to be able to monitor our model while it's training, so we use callbacks. In general, callbacks are functions that are called at specific intervals over the course of training or inference, such as at the start or end of every n iterations, epochs, etc. The callbacks we'll be using for this are the SimpleLossLoggerCallback, which reports the training loss (or another metric of your choosing, such as \\% accuracy for speaker recognition tasks), and the EvaluatorCallback, which regularly evaluates the model on the dev set. Both of these callbacks require you to pass in the tensors to be evaluated--these would be the final outputs of the training and eval DAGs above.\n",
+    "\n",
+    "Another useful callback is the CheckpointCallback, for saving checkpoints at set intervals. We create one here just to demonstrate how it works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "LFlXnbRaWTVl"
+   },
+   "outputs": [],
+   "source": [
+    "from nemo.collections.asr.helpers import (\n",
+    "    monitor_classification_training_progress,\n",
+    "    process_classification_evaluation_batch,\n",
+    "    process_classification_evaluation_epoch,\n",
+    ")\n",
+    "from nemo.utils.lr_policies import CosineAnnealing\n",
+    "\n",
+    "train_callback = nemo.core.SimpleLossLoggerCallback(\n",
+    "        tensors=[loss, logits, label],\n",
+    "        print_func=partial(monitor_classification_training_progress, eval_metric=[1]),\n",
+    "        step_freq=1000,\n",
+    "        get_tb_values=lambda x: [(\"train_loss\", x[0])],\n",
+    "        tb_writer=neural_factory.tb_writer,\n",
+    "    )\n",
+    "\n",
+    "callbacks = [train_callback]\n",
+    "\n",
+    "chpt_callback = nemo.core.CheckpointCallback(\n",
+    "            folder=\"./myExps/checkpoints/\" + exp_name,\n",
+    "            load_from_folder=\"./myExps/checkpoints/\" + exp_name,\n",
+    "            step_freq=1000,\n",
+    "        )\n",
+    "callbacks.append(chpt_callback)\n",
+    "\n",
+    "tagname = \"hi-mia_dev\"\n",
+    "eval_callback = nemo.core.EvaluatorCallback(\n",
+    "            eval_tensors=[loss_test, logits_test, label_test],\n",
+    "            user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),\n",
+    "            user_epochs_done_callback=partial(process_classification_evaluation_epoch, tag=tagname),\n",
+    "            eval_step=1000,  # How often we evaluate the model on the test set\n",
+    "            tb_writer=neural_factory.tb_writer,\n",
+    "        )\n",
+    "\n",
+    "callbacks.append(eval_callback)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "a8EFjLsWs_jM"
+   },
+   "source": [
+    "Now that we have our model and callbacks set up, how do we run it?\n",
+    "\n",
+    "Once we create our neural factory and the callbacks for the information that we want to see, we can start training by simply calling the train function on the tensors we want to optimize and our callbacks! Since this notebook is for you to get started and since the an4 as dataset is small, it would quickly get higher accuracies. For better models use bigger datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 1000
+    },
+    "colab_type": "code",
+    "id": "xHTEtz7yXVMK",
+    "outputId": "bd53ae06-cd0d-4291-da66-1af3079cbd86"
+   },
+   "outputs": [],
+   "source": [
+    "# train model\n",
+    "num_epochs=25\n",
+    "N = len(data_layer_train)\n",
+    "steps_per_epoch = N // batch_size\n",
+    "\n",
+    "logging.info(\"Number of steps per epoch {}\".format(steps_per_epoch))\n",
+    "\n",
+    "neural_factory.train(\n",
+    "        tensors_to_optimize=[loss],\n",
+    "        callbacks=callbacks,\n",
+    "        lr_policy=CosineAnnealing(\n",
+    "            num_epochs * steps_per_epoch, warmup_steps=0.1 * num_epochs * steps_per_epoch,\n",
+    "        ),\n",
+    "        optimizer=\"novograd\",\n",
+    "        optimization_params={\n",
+    "            \"num_epochs\": num_epochs,\n",
+    "            \"lr\": 0.02,\n",
+    "            \"betas\": (0.95, 0.5),\n",
+    "            \"weight_decay\": 0.001,\n",
+    "            \"grad_norm_clip\": None,\n",
+    "        }\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "BB6s19pmxGfX"
+   },
+   "source": [
+    "Now that we trained our embeddings, we shall extract these embeddings using our pretrained checkpoint present at `checkpoint_dir`. As we can see from the neural architecture, we extract the embeddings after the `emb1` layer. \n",
+    "![Speaker Recognition Layers](./speaker_reco.jpg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "oSIDu6jkym66"
+   },
+   "source": [
+    "Now use the test manifest to get the embeddings. As we saw before, let's create a new `data_layer` for test. Use previously instiated models and attach the DAGs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 258
+    },
+    "colab_type": "code",
+    "id": "5JqUVbKDY32a",
+    "outputId": "dd835e02-8882-4287-9639-c249ac3dfc94"
+   },
+   "outputs": [],
+   "source": [
+    "eval_dl_params = copy.deepcopy(spkr_params[\"AudioToSpeechLabelDataLayer\"])\n",
+    "eval_dl_params.update(spkr_params[\"AudioToSpeechLabelDataLayer\"][\"eval\"])\n",
+    "del eval_dl_params[\"train\"]\n",
+    "del eval_dl_params[\"eval\"]\n",
+    "eval_dl_params['shuffle'] = False  # To grab  the file names without changing data_layer\n",
+    "\n",
+    "test_dataset = data_dir+'/test/test_all.json',\n",
+    "data_layer_test = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "        manifest_filepath=test_dataset,\n",
+    "        labels=None,\n",
+    "        batch_size=batch_size,\n",
+    "        **eval_dl_params,\n",
+    "    )\n",
+    "\n",
+    "audio_signal_test, audio_len_test, label_test, _ = data_layer_test()\n",
+    "processed_signal_test, processed_len_test = data_preprocessor(\n",
+    "    input_signal=audio_signal_test, length=audio_len_test)\n",
+    "encoded_test, _ = encoder(audio_signal=processed_signal_test, length=processed_len_test)\n",
+    "_, embeddings = decoder(encoder_output=encoded_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "dwEifkD9zfpl"
+   },
+   "source": [
+    "Now get the embeddings using `neural_factory.infer` command. It does a forward pass of all our modules and save our embeddings in `<work_dir>/embeddings`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 153
+    },
+    "colab_type": "code",
+    "id": "wGxYiFpJze5h",
+    "outputId": "dbbc7204-28bc-43e9-b6aa-f3f757f5d4b5"
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import json\n",
+    "eval_tensors = neural_factory.infer(tensors=[embeddings, label_test], checkpoint_dir=\"./myExps/checkpoints/\" + exp_name)\n",
+    "\n",
+    "inf_emb, inf_label = eval_tensors\n",
+    "whole_embs = []\n",
+    "whole_labels = []\n",
+    "manifest = open(test_dataset, 'r').readlines()\n",
+    "\n",
+    "for line in manifest:\n",
+    "    line = line.strip()\n",
+    "    dic = json.loads(line)\n",
+    "    filename = dic['audio_filepath'].split('/')[-1]\n",
+    "    whole_labels.append(filename)\n",
+    "\n",
+    "for idx in range(len(inf_label)):\n",
+    "    whole_embs.extend(inf_emb[idx].numpy())\n",
+    "\n",
+    "embedding_dir = './myExps/embeddings/'\n",
+    "if not os.path.exists(embedding_dir):\n",
+    "    os.mkdir(embedding_dir)\n",
+    "\n",
+    "filename = os.path.basename(test_dataset).split('.')[0]\n",
+    "name = embedding_dir + filename\n",
+    "\n",
+    "np.save(name + '.npy', np.asarray(whole_embs))\n",
+    "np.save(name + '_labels.npy', np.asarray(whole_labels))\n",
+    "logging.info(\"Saved embedding files to {}\".format(embedding_dir))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 34
+    },
+    "colab_type": "code",
+    "id": "SKKVIb7e6vel",
+    "outputId": "a3fa3703-da6c-4a07-c20c-c83df11a8f25"
+   },
+   "outputs": [],
+   "source": [
+    "!ls $embedding_dir"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab": {},
+    "colab_type": "code",
+    "id": "A_7S4Yja7A8V"
+   },
+   "source": [
+    "# Cosine Similarity Scoring\n",
+    "\n",
+    "Here we provide a script scoring on hi-mia. Its trial file has the structure: `<speaker_name1> <speaker_name2> <target/nontarget>`. First copy the `trails_1m` file present in test folder to our embeddings directory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!cp $data_dir/test/trails_1m $embedding_dir/"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Use the below command to output the EER% based on the cosine similarity score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python examples/speaker_recognition/hi-mia_eval.py --data_root $embedding_dir --emb $embedding_dir/test_all.npy --emb_labels $embedding_dir/test_all_labels.npy --emb_size 1024\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PLDA Backend\n",
+    "To finetune our speaker embeddings further, we used kaldi PLDA scripts to train PLDA and evaluate. From this point going forward, please make sure you installed kaldi and added KALDI_ROOT to your path.\n",
+    "\n",
+    "To train PLDA, we can either use the dev set or training set. Let's use the training set embeddings to train PLDA and further use this trained PLDA model to score the test embeddings. In order to do that, we should get embeddings for our training data as well. Similar to the above steps, generate the train embeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_dataset = data_dir+'/train/train.json',\n",
+    "\n",
+    "data_layer_test = nemo_asr.AudioToSpeechLabelDataLayer(\n",
+    "        manifest_filepath=test_dataset,\n",
+    "        labels=None,\n",
+    "        batch_size=batch_size,\n",
+    "        **eval_dl_params,\n",
+    "    )\n",
+    "\n",
+    "audio_signal_test, audio_len_test, label_test, _ = data_layer_test()\n",
+    "processed_signal_test, processed_len_test = data_preprocessor(\n",
+    "    input_signal=audio_signal_test, length=audio_len_test)\n",
+    "encoded_test, _ = encoder(audio_signal=processed_signal_test, length=processed_len_test)\n",
+    "_, embeddings = decoder(encoder_output=encoded_test)\n",
+    "\n",
+    "eval_tensors = neural_factory.infer(tensors=[embeddings, label_test], checkpoint_dir=\"./myExps/checkpoints/\" + exp_name)\n",
+    "\n",
+    "inf_emb, inf_label = eval_tensors\n",
+    "whole_embs = []\n",
+    "whole_labels = []\n",
+    "manifest = open(test_dataset, 'r').readlines()\n",
+    "\n",
+    "for line in manifest:\n",
+    "    line = line.strip()\n",
+    "    dic = json.loads(line)\n",
+    "    filename = dic['audio_filepath'].split('/')[-1]\n",
+    "    whole_labels.append(filename)\n",
+    "\n",
+    "for idx in range(len(inf_label)):\n",
+    "    whole_embs.extend(inf_emb[idx].numpy())\n",
+    "\n",
+    "if not os.path.exists(embedding_dir):\n",
+    "    os.mkdir(embedding_dir)\n",
+    "\n",
+    "filename = os.path.basename(test_dataset).split('.')[0]\n",
+    "name = embedding_dir + filename\n",
+    "\n",
+    "np.save(name + '.npy', np.asarray(whole_embs))\n",
+    "np.save(name + '_labels.npy', np.asarray(whole_labels))\n",
+    "logging.info(\"Saved embedding files to {}\".format(embedding_dir))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As part of kaldi, we need `utt2spk` \\& `spk2utt` files to get the ark file for PLDA training. To do that, copy the generated utt2spk file from `data_dir` train folder to create the spk2utt file using \n",
+    "\n",
+    "`utt2spk_to_spk2utt.pl  $data_dir/train/utt2spk > $embedding_dir/spk2utt`\n",
+    "\n",
+    "Then run the below python script to get EER score using the PLDA backend scoring. This script does both data preparation for kaldi and PLDA scoring. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python examples/speaker_recognition/kaldi_plda.py --root $embedding_dir  --train_embs $embedding_dir/train.npy --train_labels $embedding_dir/train_labels.npy \n",
+    "--eval_embs $embedding_dir/all_embs_himia.npy --eval_labels $embedding_dir/all_ids_himia.npy --stage=1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here `--stage = 1` trains PLDA model but if you already have a trained PLDA then you can directly evaluate on it by using the `--stage=2` option.\n",
+    "\n",
+    "This should output an EER of 6.32% with minDCF: 0.455"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Performance Improvement\n",
+    "\n",
+    "To improve your embeddings performance:\n",
+    "    \n",
+    "* Add more data and Train longer (100 epochs)\n",
+    "\n",
+    "* Try adding augmentation –see config file\n",
+    "\n",
+    "* Use a larger model\n",
+    "\n",
+    "* Train on several GPUs and use mixed precision (on NVIDIA Volta and Turing GPUs)\n",
+    "\n",
+    "* Start with pre-trained checkpoints"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "Speaker_Recognition_dataset.ipynb",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/examples/speaker_recognition/notebooks/speaker_reco.jpg b/examples/speaker_recognition/notebooks/speaker_reco.jpg
new file mode 100644
index 000000000000..71350e0c4c5c
Binary files /dev/null and b/examples/speaker_recognition/notebooks/speaker_reco.jpg differ
diff --git a/examples/speaker_recognition/speaker_reco.py b/examples/speaker_recognition/speaker_reco.py
new file mode 100644
index 000000000000..be85e1863769
--- /dev/null
+++ b/examples/speaker_recognition/speaker_reco.py
@@ -0,0 +1,294 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import os
+from functools import partial
+
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.asr.helpers import (
+    monitor_classification_training_progress,
+    process_classification_evaluation_batch,
+    process_classification_evaluation_epoch,
+)
+from nemo.utils import logging
+from nemo.utils.lr_policies import CosineAnnealing
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description="SpeakerRecognition", conflict_handler="resolve",
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="novograd",
+        batch_size=32,
+        eval_batch_size=64,
+        lr=0.01,
+        weight_decay=0.001,
+        amp_opt_level="O1",
+        create_tb_writer=True,
+    )
+
+    # Overwrite default args
+    parser.add_argument(
+        "--num_epochs",
+        type=int,
+        default=None,
+        required=True,
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
+    )
+    parser.add_argument(
+        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
+    )
+
+    # Create new args
+    parser.add_argument("--exp_name", default="SpkrReco_GramMatrix", type=str)
+    parser.add_argument("--beta1", default=0.95, type=float)
+    parser.add_argument("--beta2", default=0.5, type=float)
+    parser.add_argument("--warmup_steps", default=1000, type=int)
+    parser.add_argument("--load_dir", default=None, type=str)
+    parser.add_argument("--synced_bn", action="store_true", help="Use synchronized batch norm")
+    parser.add_argument("--emb_size", default=256, type=int)
+    parser.add_argument("--synced_bn_groupsize", default=0, type=int)
+    parser.add_argument("--print_freq", default=256, type=int)
+
+    args = parser.parse_args()
+    if args.max_steps is not None:
+        raise ValueError("QuartzNet uses num_epochs instead of max_steps")
+
+    return args
+
+
+def construct_name(name, lr, batch_size, num_epochs, wd, optimizer, emb_size):
+    return "{0}-lr_{1}-bs_{2}-e_{3}-wd_{4}-opt_{5}-embsize_{6}".format(
+        name, lr, batch_size, num_epochs, wd, optimizer, emb_size
+    )
+
+
+def create_all_dags(args, neural_factory):
+    """
+    creates train and eval dags as well as their callbacks
+    returns train loss tensor and callbacks"""
+
+    # parse the config files
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as f:
+        spkr_params = yaml.load(f)
+
+    sample_rate = spkr_params["sample_rate"]
+    time_length = spkr_params.get("time_length", 8)
+    logging.info("max time length considered is {} sec".format(time_length))
+
+    # Calculate num_workers for dataloader
+    total_cpus = os.cpu_count()
+    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1) // 2
+
+    # create data layer for training
+    train_dl_params = copy.deepcopy(spkr_params["AudioToSpeechLabelDataLayer"])
+    train_dl_params.update(spkr_params["AudioToSpeechLabelDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+    audio_augmentor = spkr_params.get("AudioAugmentor", None)
+    # del train_dl_params["normalize_transcripts"]
+
+    data_layer_train = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=args.train_dataset,
+        labels=None,
+        batch_size=args.batch_size,
+        num_workers=cpu_per_traindl,
+        augmentor=audio_augmentor,
+        time_length=time_length,
+        **train_dl_params,
+        # normalize_transcripts=False
+    )
+
+    N = len(data_layer_train)
+    steps_per_epoch = int(N / (args.batch_size * args.iter_per_step * args.num_gpus))
+
+    logging.info("Number of steps per epoch {}".format(steps_per_epoch))
+    # create separate data layers for eval
+    # we need separate eval dags for separate eval datasets
+    # but all other modules in these dags will be shared
+
+    eval_dl_params = copy.deepcopy(spkr_params["AudioToSpeechLabelDataLayer"])
+    eval_dl_params.update(spkr_params["AudioToSpeechLabelDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+
+    data_layers_test = []
+    for test_set in args.eval_datasets:
+
+        data_layer_test = nemo_asr.AudioToSpeechLabelDataLayer(
+            manifest_filepath=test_set,
+            labels=data_layer_train.labels,
+            batch_size=args.batch_size,
+            num_workers=cpu_per_traindl,
+            time_length=time_length,
+            **eval_dl_params,
+            # normalize_transcripts=False
+        )
+        data_layers_test.append(data_layer_test)
+    # create shared modules
+
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+        sample_rate=sample_rate, **spkr_params["AudioToMelSpectrogramPreprocessor"],
+    )
+
+    spectr_augment_config = spkr_params.get("SpectrogramAugmentation", None)
+    if spectr_augment_config:
+        data_spectr_augmentation = nemo_asr.SpectrogramAugmentation(**spectr_augment_config)
+    # (QuartzNet uses the Jasper baseline encoder and decoder)
+    encoder = nemo_asr.JasperEncoder(**spkr_params["JasperEncoder"],)
+
+    decoder = nemo_asr.JasperDecoderForSpkrClass(
+        feat_in=spkr_params["JasperEncoder"]["jasper"][-1]["filters"],
+        num_classes=data_layer_train.num_classes,
+        pool_mode=spkr_params["JasperDecoderForSpkrClass"]['pool_mode'],
+        emb_sizes=spkr_params["JasperDecoderForSpkrClass"]["emb_sizes"].split(","),
+    )
+    if os.path.exists(args.checkpoint_dir + "/JasperEncoder-STEP-100.pt"):
+        encoder.restore_from(args.checkpoint_dir + "/JasperEncoder-STEP-100.pt")
+        logging.info("Pretrained Encoder loaded")
+
+    weight = None
+    xent_loss = nemo_asr.CrossEntropyLossNM(weight=weight)
+
+    # assemble train DAG
+
+    audio_signal, audio_signal_len, label, label_len = data_layer_train()
+
+    processed_signal, processed_signal_len = data_preprocessor(input_signal=audio_signal, length=audio_signal_len)
+
+    if spectr_augment_config:
+        processed_signal = data_spectr_augmentation(input_spec=processed_signal)
+
+    encoded, encoded_len = encoder(audio_signal=processed_signal, length=processed_signal_len)
+
+    logits, _ = decoder(encoder_output=encoded)
+    loss = xent_loss(logits=logits, labels=label)
+
+    # create train callbacks
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        tensors=[loss, logits, label],
+        print_func=partial(monitor_classification_training_progress, eval_metric=[1]),
+        step_freq=args.print_freq,
+        get_tb_values=lambda x: [("train_loss", x[0])],
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    callbacks = [train_callback]
+
+    if args.checkpoint_dir or args.load_dir:
+        chpt_callback = nemo.core.CheckpointCallback(
+            folder=args.checkpoint_dir,
+            load_from_folder=args.checkpoint_dir,  # load dir
+            step_freq=args.checkpoint_save_freq,
+            checkpoints_to_keep=125,
+        )
+
+        callbacks.append(chpt_callback)
+
+    # --- Assemble Validation DAG --- #
+
+    for i, eval_layer in enumerate(data_layers_test):
+
+        audio_signal_test, audio_len_test, label_test, _ = eval_layer()
+        processed_signal_test, processed_len_test = data_preprocessor(
+            input_signal=audio_signal_test, length=audio_len_test
+        )
+        encoded_test, encoded_len_test = encoder(audio_signal=processed_signal_test, length=processed_len_test)
+        logits_test, _ = decoder(encoder_output=encoded_test)
+        loss_test = xent_loss(logits=logits_test, labels=label_test)
+
+        tagname = os.path.dirname(args.eval_datasets[i]).split("/")[-1] + "_" + str(i)
+        print(tagname)
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=[loss_test, logits_test, label_test],
+            user_iter_callback=partial(process_classification_evaluation_batch, top_k=1),
+            user_epochs_done_callback=partial(process_classification_evaluation_epoch, tag=tagname),
+            eval_step=args.eval_freq,  # How often we evaluate the model on the test set
+            tb_writer=neural_factory.tb_writer,
+        )
+
+        callbacks.append(eval_callback)
+
+    return loss, callbacks, steps_per_epoch, loss_test, logits_test, label_test
+
+
+def main():
+    args = parse_args()
+
+    print(args)
+    emb_size = 1024
+    name = construct_name(
+        args.exp_name, args.lr, args.batch_size, args.num_epochs, args.weight_decay, args.optimizer, emb_size=emb_size,
+    )
+    work_dir = name
+    if args.work_dir:
+        work_dir = os.path.join(args.work_dir, name)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=work_dir,
+        checkpoint_dir=args.checkpoint_dir + "/" + args.exp_name,
+        create_tb_writer=args.create_tb_writer,
+        files_to_copy=[args.model_config, __file__],
+        random_seed=42,
+        cudnn_benchmark=args.cudnn_benchmark,
+        tensorboard_dir=args.tensorboard_dir + "/" + name,
+    )
+    args.num_gpus = neural_factory.world_size
+
+    args.checkpoint_dir = neural_factory.checkpoint_dir
+
+    if args.local_rank is not None:
+        logging.info("Doing ALL GPU")
+
+    # build dags
+    (train_loss, callbacks, steps_per_epoch, loss_test, logits_test, label_test,) = create_all_dags(
+        args, neural_factory
+    )
+
+    # train model
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=CosineAnnealing(
+            args.num_epochs * steps_per_epoch, warmup_steps=0.1 * args.num_epochs * steps_per_epoch,
+        ),
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "lr": args.lr,
+            "betas": (args.beta1, args.beta2),
+            "weight_decay": args.weight_decay,
+            "grad_norm_clip": None,
+        },
+        batches_per_step=args.iter_per_step,
+        synced_batchnorm=args.synced_bn,
+        synced_batchnorm_groupsize=args.synced_bn_groupsize,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/speaker_recognition/spkr_get_emb.py b/examples/speaker_recognition/spkr_get_emb.py
new file mode 100644
index 000000000000..7fe5d9848bc0
--- /dev/null
+++ b/examples/speaker_recognition/spkr_get_emb.py
@@ -0,0 +1,207 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import copy
+import json
+import os
+
+import numpy as np
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.utils.argparse as nm_argparse
+from nemo.utils import logging
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description='SpeakerRecognition', conflict_handler='resolve',
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="novograd",
+        batch_size=32,
+        eval_batch_size=64,
+        lr=0.01,
+        weight_decay=0.001,
+        amp_opt_level="O0",
+        create_tb_writer=True,
+    )
+
+    # Overwrite default args
+    parser.add_argument(
+        "--num_epochs",
+        type=int,
+        default=None,
+        required=True,
+        help="number of epochs to train. You should specify either num_epochs or max_steps",
+    )
+    parser.add_argument(
+        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
+    )
+
+    # Create new args
+    parser.add_argument("--exp_name", default="SpkrReco_GramMatrix", type=str)
+    parser.add_argument("--beta1", default=0.95, type=float)
+    parser.add_argument("--beta2", default=0.5, type=float)
+    parser.add_argument("--warmup_steps", default=1000, type=int)
+    parser.add_argument("--load_dir", default=None, type=str)
+    parser.add_argument("--synced_bn", action='store_true', help="Use synchronized batch norm")
+    parser.add_argument("--synced_bn_groupsize", default=0, type=int)
+    parser.add_argument("--emb_size", default=256, type=int)
+    parser.add_argument("--print_freq", default=256, type=int)
+
+    args = parser.parse_args()
+    if args.max_steps is not None:
+        raise ValueError("QuartzNet uses num_epochs instead of max_steps")
+
+    return args
+
+
+def construct_name(name, lr, batch_size, num_epochs, wd, optimizer, emb_size):
+    return "{0}-lr_{1}-bs_{2}-e_{3}-wd_{4}-opt_{5}-embsize_{6}".format(
+        name, lr, batch_size, num_epochs, wd, optimizer, emb_size
+    )
+
+
+def create_all_dags(args, neural_factory):
+    '''
+    creates train and eval dags as well as their callbacks
+    returns train loss tensor and callbacks'''
+
+    # parse the config files
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as f:
+        spkr_params = yaml.load(f)
+
+    sample_rate = spkr_params['sample_rate']
+
+    # Calculate num_workers for dataloader
+    total_cpus = os.cpu_count()
+    cpu_per_traindl = max(int(total_cpus / neural_factory.world_size), 1)
+
+    # create separate data layers for eval
+    # we need separate eval dags for separate eval datasets
+    # but all other modules in these dags will be shared
+
+    eval_dl_params = copy.deepcopy(spkr_params["AudioToSpeechLabelDataLayer"])
+    eval_dl_params.update(spkr_params["AudioToSpeechLabelDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+    eval_dl_params['shuffle'] = False  # To grab  the file names without changing data_layer
+
+    data_layer_test = nemo_asr.AudioToSpeechLabelDataLayer(
+        manifest_filepath=args.eval_datasets[0],
+        labels=None,
+        batch_size=args.batch_size,
+        num_workers=cpu_per_traindl,
+        **eval_dl_params,
+        # normalize_transcripts=False
+    )
+    # create shared modules
+
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+        sample_rate=sample_rate, **spkr_params["AudioToMelSpectrogramPreprocessor"],
+    )
+
+    # (QuartzNet uses the Jasper baseline encoder and decoder)
+    encoder = nemo_asr.JasperEncoder(**spkr_params["JasperEncoder"],)
+
+    decoder = nemo_asr.JasperDecoderForSpkrClass(
+        feat_in=spkr_params['JasperEncoder']['jasper'][-1]['filters'],
+        num_classes=254,
+        emb_sizes=spkr_params['JasperDecoderForSpkrClass']['emb_sizes'].split(','),
+        pool_mode=spkr_params["JasperDecoderForSpkrClass"]['pool_mode'],
+    )
+
+    # --- Assemble Validation DAG --- #
+    audio_signal_test, audio_len_test, label_test, _ = data_layer_test()
+
+    processed_signal_test, processed_len_test = data_preprocessor(
+        input_signal=audio_signal_test, length=audio_len_test
+    )
+
+    encoded_test, _ = encoder(audio_signal=processed_signal_test, length=processed_len_test)
+
+    _, embeddings = decoder(encoder_output=encoded_test)
+
+    return embeddings, label_test
+
+
+def main():
+    args = parse_args()
+
+    print(args)
+
+    name = construct_name(
+        args.exp_name, args.lr, args.batch_size, args.num_epochs, args.weight_decay, args.optimizer, args.emb_size
+    )
+    work_dir = name
+    if args.work_dir:
+        work_dir = os.path.join(args.work_dir, name)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=work_dir,
+        checkpoint_dir=args.checkpoint_dir + "/" + args.exp_name,
+        create_tb_writer=False,
+        files_to_copy=[args.model_config, __file__],
+        random_seed=42,
+        cudnn_benchmark=args.cudnn_benchmark,
+    )
+    args.num_gpus = neural_factory.world_size
+
+    args.checkpoint_dir = neural_factory.checkpoint_dir
+
+    if args.local_rank is not None:
+        logging.info('Doing ALL GPU')
+
+    # build dags
+    embeddings, label_test = create_all_dags(args, neural_factory)
+
+    eval_tensors = neural_factory.infer(tensors=[embeddings, label_test], checkpoint_dir=args.checkpoint_dir)
+    # inf_loss , inf_emb, inf_logits, inf_label = eval_tensors
+    inf_emb, inf_label = eval_tensors
+    whole_embs = []
+    whole_labels = []
+    manifest = open(args.eval_datasets[0], 'r').readlines()
+
+    for line in manifest:
+        line = line.strip()
+        dic = json.loads(line)
+        filename = dic['audio_filepath'].split('/')[-1]
+        whole_labels.append(filename)
+
+    for idx in range(len(inf_label)):
+        whole_embs.extend(inf_emb[idx].numpy())
+
+    embedding_dir = args.work_dir + './embeddings/'
+    if not os.path.exists(embedding_dir):
+        os.mkdir(embedding_dir)
+
+    filename = os.path.basename(args.eval_datasets[0]).split('.')[0]
+    name = embedding_dir + filename
+
+    np.save(name + '.npy', np.asarray(whole_embs))
+    np.save(name + '_labels.npy', np.asarray(whole_labels))
+    logging.info("Saved embedding files to {}".format(embedding_dir))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/speaker_recognition/train_plda.sh b/examples/speaker_recognition/train_plda.sh
new file mode 100755
index 000000000000..6466dfc81dd5
--- /dev/null
+++ b/examples/speaker_recognition/train_plda.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+lda_dim=200
+DIR=$1
+stage=$2
+train_scp=$DIR/train.scp
+dev_scp=$DIR/dev.scp
+
+trail_file=$DIR/trials_1m
+cd $KALDI_ROOT/egs/voxceleb/v2
+. path.sh
+. cmd.sh
+cd -
+
+if [ $stage -le 1 ]; then
+    ivector-mean scp:$train_scp $DIR/mean.vec 
+
+    echo "Training LDA"
+    $train_cmd $DIR/log ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
+    "ark:ivector-subtract-global-mean scp:$train_scp ark:- |" \
+    ark:$DIR/utt2spk $DIR/transform.mat || exit 1;
+
+    echo "TRAINING PLDA"
+    $train_cmd $DIR/log ivector-compute-plda ark:$DIR/spk2utt \
+    "ark:ivector-subtract-global-mean scp:$train_scp ark:- | transform-vec $DIR/transform.mat ark:- ark:- | ivector-normalize-length ark:-  ark:- |" $DIR/plda || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+    echo "SCORING"
+    sed 's/{}/average/' $trail_file > $DIR/temp_trail
+    $train_cmd $DIR/log ivector-plda-scoring --normalize-length=true \
+    "ivector-copy-plda --smoothing=0.0 $DIR/plda - |" \
+    "ark:ivector-subtract-global-mean $DIR/mean.vec scp:$dev_scp ark:- | transform-vec $DIR/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    "ark:ivector-subtract-global-mean $DIR/mean.vec scp:$dev_scp ark:- | transform-vec $DIR/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    "cat '$DIR/temp_trail' | cut -d\  --fields=1,2 |" $DIR/scores || exit 1;
+
+    paste -d' ' <(awk '{print $3}' $DIR/scores) <(awk '{print $3}' $trail_file) > $DIR/final_score
+
+    eer=`compute-eer <($KALDI_ROOT/egs/voxceleb/v2/local/prepare_for_eer.py  $DIR/temp_trail $DIR/scores) 2> /dev/null` 
+    # eer=`compute-eer $DIR/final_score 2> /dev/null`
+    mindcf=`$KALDI_ROOT/egs/voxceleb/v2/sid/compute_min_dcf.py $DIR/scores $DIR/temp_trail 2> /dev/null`
+    echo "EER: $eer%"
+    echo "minDCF: $mindcf"
+
+fi
diff --git a/examples/start_here/README.md b/examples/start_here/README.md
index bc218e494179..3a6e8429ad1c 100644
--- a/examples/start_here/README.md
+++ b/examples/start_here/README.md
@@ -21,18 +21,17 @@ previous example (and in a typical seq2seq model).
 ```python
 ...
 # Instance one on EncoderRNN
-encoder1 = neural_factory.get_module(
-  name="EncoderRNN", collection="tutorials",
-  params=config)
+encoder1 = nemo.tutorials.EncoderRNN(voc_size=6107, encoder_n_layers=2, hidden_size=512, dropout=0.1)
+
 # Instance two on EncoderRNN. It will have different weights from instance one
-encoder2 = neural_factory.get_module(
-  name="EncoderRNN", collection="tutorials",
-  params=config)
-mixer = neural_factory.get_module(
-  name="SimpleCombiner", collection="other",
-  params={}
-)
+encoder2 = nemo.tutorials.EncoderRNN(voc_size=6107, encoder_n_layers=2, hidden_size=512, dropout=0.1)
+
+# Create a simple combiner mixing the encodings.
+mixer = nemo.backends.pytorch.common.SimpleCombiner()
+
 ...
+
+# Create the graph by connecting input and output ports of the created modules.
 encoder_outputs1, encoder_hidden1 = encoder1(input_seq=src,
                                              input_lengths=src_lengths)
 encoder_outputs2, encoder_hidden2 = encoder2(input_seq=src,
diff --git a/examples/start_here/chatbot_example.py b/examples/start_here/chatbot_example.py
index ca2950c22bce..e0e974ab446c 100644
--- a/examples/start_here/chatbot_example.py
+++ b/examples/start_here/chatbot_example.py
@@ -3,8 +3,7 @@
 import shutil
 
 import nemo
-
-logging = nemo.logging
+from nemo.utils import logging
 
 data_file = "movie_data.txt"
 
@@ -65,12 +64,14 @@ def outputs2words(tensors, vocab):
     tensors=[loss, src, outputs_inf, tgt], print_func=lambda x: outputs2words(x, dl.voc.index2word),
 )
 
-num_epochs = 1
-logging.info(f"Training only for {num_epochs}. Train longer (~10-20) for convergence.")
+# num_epochs = 10
+max_steps = 50
+# logging.info(f"Training only for {num_epochs} epochs. 10-20 epochs recommended for convergence.")
+logging.info(f"Training only for {max_steps} steps. Train longer (~10-20) for convergence.")
 # Start training
 nf.train(
     tensors_to_optimize=[loss],
     callbacks=[callback],
     optimizer="adam",
-    optimization_params={"num_epochs": num_epochs, "lr": 0.001},
+    optimization_params={"max_steps": max_steps, "lr": 0.001},
 )
diff --git a/examples/start_here/chatbot_example2.py b/examples/start_here/chatbot_example2.py
deleted file mode 100644
index d5553413341a..000000000000
--- a/examples/start_here/chatbot_example2.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2019 NVIDIA Corporation
-import gzip
-import os
-import shutil
-
-import nemo
-
-logging = nemo.logging
-
-# Get Data
-data_file = "movie_data.txt"
-if not os.path.isfile(data_file):
-    with gzip.open("../../tests/data/movie_lines.txt.gz", 'rb') as f_in:
-        with open(data_file, 'wb') as f_out:
-            shutil.copyfileobj(f_in, f_out)
-
-# Configuration
-config = {
-    "corpus_name": "cornell",
-    "datafile": data_file,
-    "attn_model": 'dot',
-    "hidden_size": 512,
-    "encoder_n_layers": 2,
-    "decoder_n_layers": 2,
-    "dropout": 0.1,
-    "voc_size": 6104 + 3,
-    "batch_size": 128,
-    "num_epochs": 15,
-    "optimizer_kind": "adam",
-    "learning_rate": 0.0003,
-    "tb_log_dir": "ChatBot",
-}
-
-# instantiate Neural Factory with supported backend
-neural_factory = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch, local_rank=None)
-
-# instantiate necessary neural modules
-dl = neural_factory.get_module(name="DialogDataLayer", collection="tutorials", params=config)
-
-# Instance one on EncoderRNN
-encoder1 = neural_factory.get_module(name="EncoderRNN", collection="tutorials", params=config)
-# Instance two on EncoderRNN. It will have different weights from instance one
-encoder2 = neural_factory.get_module(name="EncoderRNN", collection="tutorials", params=config)
-mixer = neural_factory.get_module(name="SimpleCombiner", collection="common", params={})
-
-decoder = neural_factory.get_module(name="LuongAttnDecoderRNN", collection="tutorials", params=config)
-
-L = neural_factory.get_module(name="MaskedXEntropyLoss", collection="tutorials", params={})
-
-decoderInfer = neural_factory.get_module(name="GreedyLuongAttnDecoderRNN", collection="tutorials", params=config)
-# notice trainng and inference decoder share parameters
-decoderInfer.tie_weights_with(decoder, list(decoder.get_weights().keys()))
-
-# express activations flow
-src, src_lengths, tgt, mask, max_tgt_length = dl()
-encoder_outputs1, encoder_hidden1 = encoder1(input_seq=src, input_lengths=src_lengths)
-encoder_outputs2, encoder_hidden2 = encoder2(input_seq=src, input_lengths=src_lengths)
-encoder_outputs = mixer(x1=encoder_outputs1, x2=encoder_outputs2)
-outputs, hidden = decoder(targets=tgt, encoder_outputs=encoder_outputs, max_target_len=max_tgt_length)
-loss = L(predictions=outputs, target=tgt, mask=mask)
-
-# run inference decoder to generate predictions
-outputs_inf, _ = decoderInfer(encoder_outputs=encoder_outputs)
-
-
-# this function is necessary to print intermediate results to console
-
-
-def outputs2words(tensors, vocab):
-    source_ids = tensors[1][:, 0].cpu().numpy().tolist()
-    response_ids = tensors[2][:, 0].cpu().numpy().tolist()
-    tgt_ids = tensors[3][:, 0].cpu().numpy().tolist()
-    source = list(map(lambda x: vocab[x], source_ids))
-    response = list(map(lambda x: vocab[x], response_ids))
-    target = list(map(lambda x: vocab[x], tgt_ids))
-    source = ' '.join([s for s in source if s != 'EOS' and s != 'PAD'])
-    response = ' '.join([s for s in response if s != 'EOS' and s != 'PAD'])
-    target = ' '.join([s for s in target if s != 'EOS' and s != 'PAD'])
-    logging.info(f'Train Loss: {str(tensors[0].item())}')
-    tmp = " SOURCE: {0} <---> PREDICTED RESPONSE: {1} <---> TARGET: {2}"
-    return tmp.format(source, response, target)
-
-
-# Create trainer and execute training action
-callback = nemo.core.SimpleLossLoggerCallback(
-    tensors=[loss, src, outputs_inf, tgt], print_func=lambda x: outputs2words(x, dl.voc.index2word),
-)
-# Instantiate an optimizer to perform `train` action
-optimizer = neural_factory.get_trainer()
-
-optimizer.train(
-    tensors_to_optimize=[loss],
-    callbacks=[callback],
-    optimizer="adam",
-    optimization_params={"num_epochs": config["num_epochs"], "lr": 0.001},
-)
diff --git a/examples/start_here/module_configuration.py b/examples/start_here/module_configuration.py
new file mode 100644
index 000000000000..42d53ec237ca
--- /dev/null
+++ b/examples/start_here/module_configuration.py
@@ -0,0 +1,54 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import DeviceType, NeuralModule, NeuralModuleFactory, SimpleLossLoggerCallback
+from nemo.utils import logging
+
+# Run on CPU.
+nf = NeuralModuleFactory(placement=DeviceType.CPU)
+
+# Instantitate RealFunctionDataLayer defaults to f=torch.sin, sampling from x=[-1, 1]
+dl = RealFunctionDataLayer(n=100, f_name="cos", x_lo=-1, x_hi=1, batch_size=128)
+
+# Instantiate a simple feed-forward, single layer neural network.
+fx = TaylorNet(dim=4, name="fx")
+
+# Instantitate loss.
+mse_loss = MSELoss()
+
+# Export the model configuration.
+fx.export_to_config("/tmp/taylor_net.yml")
+
+# Create a second instance, using the parameters loaded from the previously created configuration.
+fx2 = NeuralModule.import_from_config("/tmp/taylor_net.yml")
+
+# Create a graph by connecting the outputs with inputs of modules.
+x, y = dl()
+# Please note that in the graph we are using the "second" instance.
+p = fx2(x=x)
+loss = mse_loss(predictions=p, target=y)
+
+# SimpleLossLoggerCallback will print loss values to console.
+callback = SimpleLossLoggerCallback(
+    tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}')
+)
+
+# Invoke the "train" action.
+nf.train([loss], callbacks=[callback], optimization_params={"num_epochs": 3, "lr": 0.0003}, optimizer="sgd")
diff --git a/examples/start_here/module_custom_configuration.py b/examples/start_here/module_custom_configuration.py
new file mode 100644
index 000000000000..4f406304de23
--- /dev/null
+++ b/examples/start_here/module_custom_configuration.py
@@ -0,0 +1,120 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from enum import Enum
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import DeviceType, NeuralModuleFactory, SimpleLossLoggerCallback
+from nemo.utils import logging
+
+
+# A custom enum.
+class Status(Enum):
+    success = 0
+    error = 1
+
+
+class CustomTaylorNet(TaylorNet):
+    """Module which learns Taylor's coefficients. Extends the original module by a custom status enum."""
+
+    def __init__(self, dim, status: Status):
+        super().__init__(dim)
+        logging.info("Status: {}".format(status))
+
+    def _serialize_configuration(self):
+        """
+            A custom method serializing the configuration to a YAML file.
+
+            Returns:
+                a "serialized" dictionary with module configuration.
+        """
+
+        # Create the dictionary to be exported.
+        init_to_export = {}
+
+        # "Serialize" dim.
+        init_to_export["dim"] = self._init_params["dim"]
+
+        # Custom "serialization" of the status.
+        if self._init_params["status"] == Status.success:
+            init_to_export["status"] = 0
+        else:
+            init_to_export["status"] = 1
+
+        # Return serialized parameters.
+        return init_to_export
+
+    @classmethod
+    def _deserialize_configuration(cls, init_params):
+        """
+            A function that deserializes the module "configuration (i.e. init parameters).
+
+            Args:
+                init_params: List of init parameters loaded from the YAML file.
+
+            Returns:
+                A "deserialized" list with init parameters.
+        """
+        deserialized_params = {}
+
+        # "Deserialize" dim.
+        deserialized_params["dim"] = init_params["dim"]
+
+        # Custom "deserialization" of the status.
+        if init_params["status"] == 0:
+            deserialized_params["status"] = Status.success
+        else:
+            deserialized_params["status"] = Status.error
+
+        # Return deserialized parameters.
+        return deserialized_params
+
+
+# Run on CPU.
+nf = NeuralModuleFactory(placement=DeviceType.CPU)
+
+# Instantitate RealFunctionDataLayer defaults to f=torch.sin, sampling from x=[-1, 1]
+dl = RealFunctionDataLayer(n=100, f_name="cos", x_lo=-1, x_hi=1, batch_size=32)
+
+# Instantiate a simple feed-forward, single layer neural network.
+fx = CustomTaylorNet(dim=4, status=Status.error)
+
+# Instantitate loss.
+mse_loss = MSELoss()
+
+# Export the model configuration.
+fx.export_to_config("/tmp/custom_taylor_net.yml")
+
+# Create a second instance, using the parameters loaded from the previously created configuration.
+# Please note that we are calling the overriden method from the CustomTaylorNet class.
+fx2 = CustomTaylorNet.import_from_config("/tmp/custom_taylor_net.yml")
+
+# Create a graph by connecting the outputs with inputs of modules.
+x, y = dl()
+# Please note that in the graph we are using the "second" instance.
+p = fx2(x=x)
+loss = mse_loss(predictions=p, target=y)
+
+# SimpleLossLoggerCallback will print loss values to console.
+callback = SimpleLossLoggerCallback(
+    tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}')
+)
+
+# Invoke the "train" action.
+nf.train([loss], callbacks=[callback], optimization_params={"num_epochs": 3, "lr": 0.0003}, optimizer="sgd")
diff --git a/examples/start_here/simplest_example.py b/examples/start_here/simplest_example.py
index 0bf3fb795dac..1e4bd2de633f 100644
--- a/examples/start_here/simplest_example.py
+++ b/examples/start_here/simplest_example.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import nemo
-
-logging = nemo.logging
+from nemo.utils import logging
 
 nf = nemo.core.NeuralModuleFactory()
 # To use CPU-only do:
diff --git a/examples/start_here/simplest_example_configuration_import.py b/examples/start_here/simplest_example_configuration_import.py
deleted file mode 100644
index 310c2bf25b54..000000000000
--- a/examples/start_here/simplest_example_configuration_import.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# TODO: actually fill this
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import nemo
-from nemo.core import DeviceType
-
-# Run on CPU.
-nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)
-
-
-# instantiate necessary neural modules
-# RealFunctionDataLayer defaults to f=torch.sin, sampling from x=[-4, 4]
-# dl = nemo.tutorials.RealFunctionDataLayer(n=10000, f_name="cos", x=[-4, 4], batch_size=128)
-dl = nemo.tutorials.RealFunctionDataLayer(n=100, f_name="cos", x_lo=-1, x_hi=1, batch_size=128)
-
-
-fx = nemo.tutorials.TaylorNet(dim=4)
-loss = nemo.tutorials.MSELoss()
-
-# describe activation's flow
-x, y = dl()
-p = fx(x=x)
-lss = loss(predictions=p, target=y)
-
-# SimpleLossLoggerCallback will print loss values to console.
-callback = nemo.core.SimpleLossLoggerCallback(
-    tensors=[lss], print_func=lambda x: nemo.logging.info(f'Train Loss: {str(x[0].item())}')
-)
-
-
-# Invoke "train" action
-nf.train([lss], callbacks=[callback], optimization_params={"num_epochs": 3, "lr": 0.0003}, optimizer="sgd")
diff --git a/examples/tts/configs/fastspeech.yaml b/examples/tts/configs/fastspeech.yaml
new file mode 100644
index 000000000000..d6540d32663f
--- /dev/null
+++ b/examples/tts/configs/fastspeech.yaml
@@ -0,0 +1,76 @@
+model: "FastSpeech"
+sample_rate: &sr 22050
+# <PAD>, <BOS>, <EOS> will be added by the fastspeech.py script.
+labels: &labels [' ', '!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B',
+         'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
+         'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', 'a', 'b',
+         'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+         'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
+n_fft: &n_fft 1024
+n_mels: &n_mels 80
+fmax: &fmax 8000
+n_stride: &n_window_stride 256
+pad_value: &pad_value -11.52
+
+FastSpeechDataLayer:
+    header: {collection_type: asr, collection_version: null,
+             full_spec: nemo.collections.tts.FastSpeechDataLayer, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        int_values: false
+        labels: *labels
+        load_audio: true
+        max_duration: null
+        min_duration: 0.1
+        normalize_transcripts: false
+        sample_rate: *sr
+        trim_silence: false
+        shuffle: true
+
+AudioToMelSpectrogramPreprocessor:
+    header: {full_spec: nemo.collections.asr.audio_preprocessing.AudioToMelSpectrogramPreprocessor,
+             collection_type: asr, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        dither: 0.0
+        features: *n_mels
+        frame_splicing: 1
+        highfreq: *fmax
+        log: true
+        log_zero_guard_type: clamp
+        log_zero_guard_value: 1e-05
+        lowfreq: 0
+        mag_power: 1.0
+        n_fft: *n_fft
+        n_window_size: 1024
+        n_window_stride: *n_window_stride
+        normalize: null
+        pad_to: 16
+        pad_value: *pad_value
+        preemph: null
+        sample_rate: *sr
+        stft_conv: true
+        window: hann
+        window_size: null
+        window_stride: null
+
+FastSpeech:
+    header: {full_spec: nemo.collections.tts.FastSpeech,
+             collection_type: asr, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        decoder_output_size: 384
+        n_mels: *n_mels
+        max_seq_len: 2048
+        word_vec_dim: 384
+        encoder_n_layer: 6
+        encoder_head: 2
+        encoder_conv1d_filter_size: 1536
+        decoder_n_layer: 6
+        decoder_head: 2
+        decoder_conv1d_filter_size: 1536
+        fft_conv1d_kernel: 3
+        fft_conv1d_padding: 1
+        encoder_output_size: 384
+        duration_predictor_filter_size: 256
+        duration_predictor_kernel_size: 3
+        dropout: 0.1
+        alpha: 1.0
\ No newline at end of file
diff --git a/examples/tts/configs/tacotron2.yaml b/examples/tts/configs/tacotron2.yaml
index a405d4b85b90..bbb06ceb7c2a 100644
--- a/examples/tts/configs/tacotron2.yaml
+++ b/examples/tts/configs/tacotron2.yaml
@@ -1,7 +1,7 @@
 model: "Tacotron 2"
 sample_rate: &sr 22050
 # <PAD>, <BOS>, <EOS> will be added by the tacotron2.py script
-labels: [' ', '!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B',
+labels: &labels [' ', '!', '"', "'", '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B',
          'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
          'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', 'a', 'b',
          'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
@@ -12,67 +12,127 @@ fmax: &fmax 8000
 n_stride: &n_window_stride 256
 pad_value: &pad_value -11.52
 
-AudioToTextDataLayer:
-    normalize_transcripts: false
-    sample_rate: *sr
-    train:
+AudioToTextDataLayer_train:
+    header: {collection_type: asr, collection_version: null,
+        full_spec: nemo.collections.asr.data_layer.AudioToTextDataLayer, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        int_values: false
+        labels: *labels
+        load_audio: true
+        max_duration: null
+        min_duration: 0.1
+        normalize_transcripts: false
+        sample_rate: *sr
+        trim_silence: false
         shuffle: true
-    eval:
+        # batch_size: 48  # Overwritten in tacotron2.py
+        # num_workers: 20
+        # manifest_filepath: ljspeech_train.json
+        # bos_id: 66
+        # eos_id: 67
+        # pad_id: 68
+
+AudioToTextDataLayer_eval:
+    header: {collection_type: asr, collection_version: null,
+        full_spec: nemo.collections.asr.data_layer.AudioToTextDataLayer, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        int_values: false
+        labels: *labels
+        load_audio: true
+        max_duration: null
+        min_duration: 0.1
+        normalize_transcripts: false
+        sample_rate: *sr
+        trim_silence: false
         shuffle: false
+        # batch_size: 48  # Overwritten in tacotron2.py
+        # num_workers: 20
+        # manifest_filepath: ljspeech_eval.json
+        # bos_id: 66
+        # eos_id: 67
+        # pad_id: 68
 
 AudioToMelSpectrogramPreprocessor:
-    normalize: null
-    window_size: null
-    window_stride: null
-    n_window_size: 1024
-    n_window_stride: *n_window_stride
-    window: "hann"
-    features: *n_mels
-    n_fft: *n_fft
-    frame_splicing: 1
-    dither: 0.
-    #feat_type: "logfbank"
-    stft_conv: true
-    sample_rate: *sr
-    highfreq: *fmax
-    preemph: null
-    pad_value: *pad_value
-    mag_power: 1.
-    log_zero_guard_type: "clamp"
-    log_zero_guard_value: 1.0e-5
+    header: {full_spec: nemo.collections.asr.audio_preprocessing.AudioToMelSpectrogramPreprocessor,
+        collection_type: asr, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        dither: 0.0
+        features: *n_mels
+        frame_splicing: 1
+        highfreq: *fmax
+        log: true
+        log_zero_guard_type: clamp
+        log_zero_guard_value: 1e-05
+        lowfreq: 0
+        mag_power: 1.0
+        n_fft: *n_fft
+        n_window_size: 1024
+        n_window_stride: *n_window_stride
+        normalize: null
+        pad_to: 16
+        pad_value: *pad_value
+        preemph: null
+        sample_rate: *sr
+        stft_conv: true
+        window: hann
+        window_size: null
+        window_stride: null
 
 Tacotron2Encoder:
-    encoder_kernel_size: 5
-    encoder_n_convolutions: 3
-    encoder_embedding_dim: &enc_emb_dim 512
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Encoder,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        encoder_kernel_size: 5
+        encoder_n_convolutions: 3
+        encoder_embedding_dim: &enc_emb_dim 512
 
 TextEmbedding:
-    symbols_embedding_dim: *enc_emb_dim
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.TextEmbedding,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        symbols_embedding_dim: *enc_emb_dim
+        # n_symbols: 69  # Overwritten in tacotron2.py
 
 Tacotron2Decoder:
-    n_mel_channels: *n_mels
-    n_frames_per_step: 1 # currently only 1 is supported
-    encoder_embedding_dim: *enc_emb_dim
-    decoder_rnn_dim: 1024
-    prenet_dim: 256
-    max_decoder_steps: 1000
-    gate_threshold: 0.5
-    p_attention_dropout: 0.1
-    p_decoder_dropout: 0.1
-
-    # Attention parameters
-    attention_rnn_dim: 1024
-    attention_dim: 128
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Decoder,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: &decoder_params
+        decoder_rnn_dim: 1024
+        encoder_embedding_dim: *enc_emb_dim
+        gate_threshold: 0.5
+        max_decoder_steps: 1000
+        n_frames_per_step: 1  # currently only 1 is supported
+        n_mel_channels: *n_mels
+        p_attention_dropout: 0.1
+        p_decoder_dropout: 0.1
+        prenet_dim: 256
+        prenet_p_dropout: 0.5
+        # Attention parameters
+        attention_dim: 128
+        attention_rnn_dim: 1024
+        # AttentionLocation Layer parameters
+        attention_location_kernel_size: 31
+        attention_location_n_filters: 32
 
-    # Location Layer parameters
-    attention_location_n_filters: 32
-    attention_location_kernel_size: 31
+Tacotron2DecoderInfer:
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2DecoderInfer,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: *decoder_params
 
 Tacotron2Postnet:
-    n_mel_channels: *n_mels
-    postnet_embedding_dim: 512
-    postnet_kernel_size: 5
-    postnet_n_convolutions: 5
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Postnet,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        n_mel_channels: *n_mels
+        p_dropout: 0.5
+        postnet_embedding_dim: 512
+        postnet_kernel_size: 5
+        postnet_n_convolutions: 5
 
 Tacotron2Loss:
-    pad_value: *pad_value
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Loss,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        pad_value: *pad_value
diff --git a/examples/tts/configs/tacotron2_mandarin.yaml b/examples/tts/configs/tacotron2_mandarin.yaml
index eb90b6761747..e9e5827c6401 100644
--- a/examples/tts/configs/tacotron2_mandarin.yaml
+++ b/examples/tts/configs/tacotron2_mandarin.yaml
@@ -1,7 +1,7 @@
 model: "Tacotron 2 Mandarin"
 sample_rate: &sr 22050
 # <PAD>, <BOS>, <EOS> will be added by the tacotron2.py script
-labels: [' ', '!', ',', '.', '?', 'a', 'b', 'c',
+labels: &labels [' ', '!', ',', '.', '?', 'a', 'b', 'c',
          'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
          'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
          't', 'u', 'v', 'w', 'x', 'y', 'z', '0',
@@ -12,67 +12,127 @@ fmax: &fmax 8000
 n_stride: &n_window_stride 256
 pad_value: &pad_value -11.52
 
-AudioToTextDataLayer:
-    normalize_transcripts: false
-    sample_rate: *sr
-    train:
+AudioToTextDataLayer_train:
+    header: {collection_type: asr, collection_version: null,
+        full_spec: nemo.collections.asr.data_layer.AudioToTextDataLayer, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        int_values: false
+        labels: *labels
+        load_audio: true
+        max_duration: null
+        min_duration: 0.1
+        normalize_transcripts: false
+        sample_rate: *sr
+        trim_silence: false
         shuffle: true
-    eval:
+        # batch_size: 48  # Overwritten in tacotron2.py
+        # num_workers: 20
+        # manifest_filepath: ljspeech_train.json
+        # bos_id: 66
+        # eos_id: 67
+        # pad_id: 68
+
+AudioToTextDataLayer_eval:
+    header: {collection_type: asr, collection_version: null,
+        full_spec: nemo.collections.asr.data_layer.AudioToTextDataLayer, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        int_values: false
+        labels: *labels
+        load_audio: true
+        max_duration: null
+        min_duration: 0.1
+        normalize_transcripts: false
+        sample_rate: *sr
+        trim_silence: false
         shuffle: false
+        # batch_size: 48  # Overwritten in tacotron2.py
+        # num_workers: 20
+        # manifest_filepath: ljspeech_eval.json
+        # bos_id: 66
+        # eos_id: 67
+        # pad_id: 68
 
 AudioToMelSpectrogramPreprocessor:
-    normalize: null
-    window_size: null
-    window_stride: null
-    n_window_size: 1024
-    n_window_stride: *n_window_stride
-    window: "hann"
-    features: *n_mels
-    n_fft: *n_fft
-    frame_splicing: 1
-    dither: 0.
-    feat_type: "logfbank"
-    stft_conv: true
-    sample_rate: *sr
-    highfreq: *fmax
-    preemph: null
-    pad_value: *pad_value
-    mag_power: 1.
-    log_zero_guard_type: "clamp"
-    log_zero_guard_value: 1.0e-5
+    header: {full_spec: nemo.collections.asr.audio_preprocessing.AudioToMelSpectrogramPreprocessor,
+        collection_type: asr, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        dither: 0.0
+        features: *n_mels
+        frame_splicing: 1
+        highfreq: *fmax
+        log: true
+        log_zero_guard_type: clamp
+        log_zero_guard_value: 1e-05
+        lowfreq: 0
+        mag_power: 1.0
+        n_fft: *n_fft
+        n_window_size: 1024
+        n_window_stride: *n_window_stride
+        normalize: null
+        pad_to: 16
+        pad_value: *pad_value
+        preemph: null
+        sample_rate: *sr
+        stft_conv: true
+        window: hann
+        window_size: null
+        window_stride: null
 
 Tacotron2Encoder:
-    encoder_kernel_size: 5
-    encoder_n_convolutions: 3
-    encoder_embedding_dim: &enc_emb_dim 512
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Encoder,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        encoder_kernel_size: 5
+        encoder_n_convolutions: 3
+        encoder_embedding_dim: &enc_emb_dim 512
 
 TextEmbedding:
-    symbols_embedding_dim: *enc_emb_dim
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.TextEmbedding,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        symbols_embedding_dim: *enc_emb_dim
+        # n_symbols: 69  # Overwritten in tacotron2.py
 
 Tacotron2Decoder:
-    n_mel_channels: *n_mels
-    n_frames_per_step: 1 # currently only 1 is supported
-    encoder_embedding_dim: *enc_emb_dim
-    decoder_rnn_dim: 1024
-    prenet_dim: 256
-    max_decoder_steps: 1000
-    gate_threshold: 0.5
-    p_attention_dropout: 0.1
-    p_decoder_dropout: 0.1
-
-    # Attention parameters
-    attention_rnn_dim: 1024
-    attention_dim: 128
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Decoder,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: &decoder_params
+        decoder_rnn_dim: 1024
+        encoder_embedding_dim: *enc_emb_dim
+        gate_threshold: 0.5
+        max_decoder_steps: 1000
+        n_frames_per_step: 1  # currently only 1 is supported
+        n_mel_channels: *n_mels
+        p_attention_dropout: 0.1
+        p_decoder_dropout: 0.1
+        prenet_dim: 256
+        prenet_p_dropout: 0.5
+        # Attention parameters
+        attention_dim: 128
+        attention_rnn_dim: 1024
+        # AttentionLocation Layer parameters
+        attention_location_kernel_size: 31
+        attention_location_n_filters: 32
 
-    # Location Layer parameters
-    attention_location_n_filters: 32
-    attention_location_kernel_size: 31
+Tacotron2DecoderInfer:
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2DecoderInfer,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: *decoder_params
 
 Tacotron2Postnet:
-    n_mel_channels: *n_mels
-    postnet_embedding_dim: 512
-    postnet_kernel_size: 5
-    postnet_n_convolutions: 5
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Postnet,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        n_mel_channels: *n_mels
+        p_dropout: 0.5
+        postnet_embedding_dim: 512
+        postnet_kernel_size: 5
+        postnet_n_convolutions: 5
 
 Tacotron2Loss:
-    pad_value: *pad_value
+    header: {full_spec: nemo.collections.tts.tacotron2_modules.Tacotron2Loss,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        pad_value: *pad_value
diff --git a/examples/tts/configs/waveglow.yaml b/examples/tts/configs/waveglow.yaml
index b6738fc26baf..7d65a7accb0d 100644
--- a/examples/tts/configs/waveglow.yaml
+++ b/examples/tts/configs/waveglow.yaml
@@ -2,43 +2,89 @@ model: "waveglow"
 sample_rate: &sr 22050
 n_fft: &n_fft 1024
 
-AudioDataLayer:
-    sample_rate: *sr
-    train:
+AudioDataLayer_train:
+    header: {full_spec: nemo.collections.tts.data_layers.AudioDataLayer,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        max_duration: null
+        min_duration: 0.1
         n_segments: 16000
+        sample_rate: *sr
         shuffle: true
-    eval:
+        trim_silence: false
+        # manifest_filepath: ljspeech_train.json  # Added by waveglow.py script
+        # batch_size: 12
+        # num_workers: 20
+
+AudioDataLayer_eval:
+    header: {full_spec: nemo.collections.tts.data_layers.AudioDataLayer,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        drop_last: false
+        max_duration: null
+        min_duration: 0.1
+        n_segments: 0
+        sample_rate: *sr
         shuffle: false
+        trim_silence: false
+        # manifest_filepath: ljspeech_eval.json  # Added by waveglow.py script
+        # batch_size: 12
+        # num_workers: 20
+
 
 AudioToMelSpectrogramPreprocessor:
-    normalize: null
-    window_size: null
-    window_stride: null
-    # Waveglow is currently hardcoded to these values for window size and stride
-    # Changing these parameters are not recommended
-    n_window_size: 1024
-    n_window_stride: 256
-    window: "hann"
-    features: &n_mels 80
-    n_fft: *n_fft
-    frame_splicing: 1
-    dither: 0.
-    feat_type: "logfbank"
-    stft_conv: true
-    sample_rate: *sr
-    highfreq: 8000
-    preemph: null
-    mag_power: 1.
-    pad_value: -11.52
-    log_zero_guard_type: "clamp"
-    log_zero_guard_value: 1.0e-5
+    header: {full_spec: nemo.collections.asr.audio_preprocessing.AudioToMelSpectrogramPreprocessor,
+        collection_type: asr, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        dither: 0.0
+        features: &n_mels 80
+        frame_splicing: 1
+        highfreq: 8000
+        log: true
+        log_zero_guard_type: clamp
+        log_zero_guard_value: 1e-05
+        lowfreq: 0
+        mag_power: 1.0
+        n_fft: *n_fft
+
+        # Waveglow is currently hardcoded to these values for window size and stride
+        # Changing these parameters are not recommended
+        n_window_size: 1024
+        n_window_stride: 256
+
+        normalize: null
+        pad_to: 16
+        pad_value: -11.52
+        preemph: null
+        sample_rate: *sr
+        stft_conv: true
+        window: hann
+        window_size: null
+        window_stride: null
 
 WaveGlowNM:
-    n_mel_channels: *n_mels
-    n_flows: 12
-    n_group: 8
-    n_early_every: 4
-    n_early_size: 2
-    n_wn_layers: 8
-    n_wn_channels: 512
-    wn_kernel_size: 3
+    header: {full_spec: nemo.collections.tts.waveglow_modules.WaveGlowNM,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: &WaveGlowParams
+        n_early_every: 4
+        n_early_size: 2
+        n_flows: 12
+        n_group: 8
+        n_mel_channels: *n_mels
+        n_wn_channels: 512
+        n_wn_layers: 8
+        sample_rate: *sr
+        wn_kernel_size: 3
+
+WaveGlowInferNM:
+    header: {full_spec: nemo.collections.tts.waveglow_modules.WaveGlowInferNM,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params: *WaveGlowParams
+
+WaveGlowLoss:
+    header: {full_spec: nemo.collections.tts.waveglow_modules.WaveGlowLoss,
+        collection_type: tts, collection_version: null, nemo_core_version: 0.9.5}
+    init_params:
+        sample_rate: *sr
+        sigma: 1.0
diff --git a/examples/tts/fastspeech.py b/examples/tts/fastspeech.py
new file mode 100644
index 000000000000..15a147fbfd03
--- /dev/null
+++ b/examples/tts/fastspeech.py
@@ -0,0 +1,142 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import math
+import os
+from pathlib import Path
+
+import attrdict
+from ruamel import yaml
+
+import nemo
+from nemo.collections import asr as nemo_asr
+from nemo.collections import tts as nemo_tts
+from nemo.utils import argparse as nm_argparse
+from nemo.utils import logging, lr_policies
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='FastSpeech training pipeline.',
+        parents=[nm_argparse.NemoArgParser()],
+        conflict_handler='resolve',  # For parents common flags.
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer='adam',
+        batch_size=16,
+        work_dir='fastspeech_output',
+        eval_batch_size=32,
+        num_epochs=10,
+        lr=0.001,
+        amp_opt_level='O0',
+        create_tb_writer=True,
+        lr_policy=None,
+        weight_decay=1e-6,
+    )
+
+    parser.add_argument('--id', type=str, default='default', help="Experiment identificator for clarity.")
+    parser.add_argument('--durations_dir', type=str, help="Train dataset durations directory path.")
+    parser.add_argument('--grad_norm_clip', type=float, default=1.0, help="Gradient clipping.")
+    parser.add_argument('--min_lr', type=float, default=1e-5, help="Minimum learning rate to decay to.")
+
+    args = parser.parse_args()
+
+    return args
+
+
+class FastSpeechGraph:
+    def __init__(self, args, config, num_workers):
+        self.data_layer = nemo_tts.FastSpeechDataLayer.import_from_config(
+            args.model_config,
+            'FastSpeechDataLayer',
+            overwrite_params=dict(
+                manifest_filepath=args.train_dataset,
+                durs_dir=args.durations_dir,
+                bos_id=len(config.labels),
+                eos_id=len(config.labels) + 1,
+                pad_id=len(config.labels) + 2,
+                batch_size=args.batch_size,
+                num_workers=num_workers,
+            ),
+        )
+
+        self.data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.import_from_config(
+            args.model_config, 'AudioToMelSpectrogramPreprocessor', overwrite_params=dict(pad_to=0),
+        )
+
+        self.fastspeech = nemo_tts.FastSpeech.import_from_config(
+            args.model_config,
+            'FastSpeech',
+            overwrite_params=dict(n_src_vocab=len(config.labels) + 3, pad_id=len(config.labels) + 2),
+        )
+
+        self.loss = nemo_tts.FastSpeechLoss()
+
+    def build(self):
+        data = self.data_layer()
+        mel_true, _ = self.data_preprocessor(input_signal=data.audio, length=data.audio_len)
+        mel_pred, dur_pred = self.fastspeech(
+            text=data.text, text_pos=data.text_pos, mel_true=mel_true, dur_true=data.dur_true,
+        )
+        loss = self.loss(
+            mel_true=mel_true, mel_pred=mel_pred, dur_true=data.dur_true, dur_pred=dur_pred, text_pos=data.text_pos,
+        )
+
+        callbacks = [
+            nemo.core.SimpleLossLoggerCallback([loss], print_func=lambda x: logging.info(f'Loss: {x[0].data}'))
+        ]
+
+        return loss, callbacks
+
+
+def main():
+    args = parse_args()
+    work_dir = Path(args.work_dir) / args.id
+    engine = nemo.core.NeuralModuleFactory(
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        cudnn_benchmark=args.cudnn_benchmark,
+        log_dir=work_dir / 'log',
+        checkpoint_dir=work_dir / 'checkpoints',
+        tensorboard_dir=work_dir / 'tensorboard',
+        files_to_copy=[args.model_config],
+    )
+
+    yaml_loader = yaml.YAML(typ="safe")
+    with open(args.model_config) as f:
+        config = attrdict.AttrDict(yaml_loader.load(f))
+    logging.info(f'Config: {config}')
+    graph = FastSpeechGraph(args, config, num_workers=max(int(os.cpu_count() / engine.world_size), 1))
+
+    steps_per_epoch = math.ceil(len(graph.data_layer) / (args.batch_size * engine.world_size))
+    total_steps = args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch
+    loss, callbacks = graph.build()
+    engine.train(
+        tensors_to_optimize=[loss],
+        optimizer=args.optimizer,
+        optimization_params=dict(
+            num_epochs=args.num_epochs,
+            max_steps=total_steps,
+            lr=args.lr,
+            weight_decay=args.weight_decay,
+            grad_norm_clip=args.grad_norm_clip,
+        ),
+        callbacks=callbacks,
+        lr_policy=lr_policies.CosineAnnealing(total_steps, min_lr=args.min_lr, warmup_steps=4000),
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/tts/fastspeech_durations.py b/examples/tts/fastspeech_durations.py
new file mode 100644
index 000000000000..ac692e366cf4
--- /dev/null
+++ b/examples/tts/fastspeech_durations.py
@@ -0,0 +1,164 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import pathlib
+
+import numpy as np
+import tqdm
+from ruamel.yaml import YAML
+from tacotron2 import create_NMs
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.collections.tts as nemo_tts
+from nemo.utils import logging
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='TTS')
+    parser.add_argument("--local_rank", default=None, type=int)
+    parser.add_argument(
+        "--spec_model",
+        type=str,
+        required=True,
+        choices=["tacotron2"],
+        help="Model generated to generate spectrograms",
+    )
+    parser.add_argument(
+        "--spec_model_config", type=str, required=True, help="spec model configuration file: model.yaml",
+    )
+    parser.add_argument(
+        "--spec_model_load_dir", type=str, required=True, help="directory containing checkpoints for spec model",
+    )
+    parser.add_argument("--eval_dataset", type=str, required=True)
+
+    parser.add_argument(
+        '--durations_dir', type=str, default='durs',
+    )
+
+    parser.add_argument("--batch_size", type=int, default=16)
+    parser.add_argument("--amp_opt_level", default="O1")
+
+    args = parser.parse_args()
+
+    return args
+
+
+def create_infer_dags(
+    neural_factory,
+    neural_modules,
+    tacotron2_config_file,
+    tacotron2_params,
+    infer_dataset,
+    infer_batch_size,
+    labels,
+    cpu_per_dl=1,
+):
+    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, _, _) = neural_modules
+
+    data_layer = nemo_asr.AudioToTextDataLayer.import_from_config(
+        tacotron2_config_file,
+        "AudioToTextDataLayer_eval",
+        overwrite_params={
+            "manifest_filepath": infer_dataset,
+            "batch_size": infer_batch_size,
+            "num_workers": cpu_per_dl,
+            "bos_id": len(labels),
+            "eos_id": len(labels) + 1,
+            "pad_id": len(labels) + 2,
+        },
+    )
+
+    audio, audio_len, transcript, transcript_len = data_layer()
+    spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
+
+    transcript_embedded = text_embedding(char_phone=transcript)
+    transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,)
+    if isinstance(t2_dec, nemo_tts.Tacotron2Decoder):
+        t2_dec.force = True
+        mel_decoder, gate, alignments = t2_dec(
+            char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
+        )
+    else:
+        raise ValueError("The Neural Module for tacotron2 decoder was not understood")
+    mel_postnet = t2_postnet(mel_input=mel_decoder)
+
+    return [mel_postnet, gate, alignments, spec_target_len, transcript_len]
+
+
+def main():
+    args = parse_args()
+    neural_factory = nemo.core.NeuralModuleFactory(
+        optimization_level=args.amp_opt_level, backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank,
+    )
+
+    use_cache = True
+    if args.local_rank is not None:
+        logging.info("Doing ALL GPU")
+        use_cache = False
+
+    # Create text to spectrogram model
+    if args.spec_model == "tacotron2":
+        yaml = YAML(typ="safe")
+        with open(args.spec_model_config) as file:
+            tacotron2_params = yaml.load(file)
+        spec_neural_modules = create_NMs(
+            args.spec_model_config, labels=tacotron2_params['labels'], decoder_infer=False
+        )
+        infer_tensors = create_infer_dags(
+            neural_factory=neural_factory,
+            neural_modules=spec_neural_modules,
+            tacotron2_config_file=args.spec_model_config,
+            tacotron2_params=tacotron2_params,
+            infer_dataset=args.eval_dataset,
+            infer_batch_size=args.batch_size,
+            labels=tacotron2_params['labels'],
+        )
+
+    logging.info("Running Tacotron 2")
+    # Run tacotron 2
+    evaluated_tensors = neural_factory.infer(
+        tensors=infer_tensors, checkpoint_dir=args.spec_model_load_dir, cache=use_cache, offload_to_cpu=True,
+    )
+
+    def get_D(alignment, true_len):
+        D = np.array([0 for _ in range(np.shape(alignment)[1])])
+
+        for i in range(np.shape(alignment)[0]):
+            max_index = alignment[i].tolist().index(alignment[i].max())
+            D[max_index] = D[max_index] + 1
+
+        assert D.sum() == alignment.shape[0]
+        assert D.sum() == true_len
+
+        return D
+
+    # Save durations.
+    alignments_dir = pathlib.Path(args.durations_dir)
+    alignments_dir.mkdir(exist_ok=True)
+    k = -1
+    for alignments, mel_lens, text_lens in zip(
+        tqdm.tqdm(evaluated_tensors[2]), evaluated_tensors[3], evaluated_tensors[4],
+    ):
+        for alignment, mel_len, text_len in zip(alignments, mel_lens, text_lens):
+            alignment = alignment.cpu().numpy()
+            mel_len = mel_len.cpu().numpy().item()
+            text_len = text_len.cpu().numpy().item()
+            dur = get_D(alignment[:mel_len, :text_len], mel_len)
+            k += 1
+            np.save(alignments_dir / f'{k}.npy', dur, allow_pickle=False)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/tts/notebooks/1_Tacotron_inference.ipynb b/examples/tts/notebooks/1_Tacotron_inference.ipynb
new file mode 100644
index 000000000000..23b27fecc395
--- /dev/null
+++ b/examples/tts/notebooks/1_Tacotron_inference.ipynb
@@ -0,0 +1,650 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Copyright 2020 NVIDIA. All Rights Reserved.\n",
+    "#\n",
+    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+    "# you may not use this file except in compliance with the License.\n",
+    "# You may obtain a copy of the License at\n",
+    "#\n",
+    "#     http://www.apache.org/licenses/LICENSE-2.0\n",
+    "#\n",
+    "# Unless required by applicable law or agreed to in writing, software\n",
+    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "# See the License for the specific language governing permissions and\n",
+    "# limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n",
+    "Instructions for setting up Colab are as follows:\n",
+    "1. Open a new Python 3 notebook.\n",
+    "2. Import this notebook from GitHub (File -> Upload Notebook -> \"GITHUB\" tab -> copy/paste GitHub URL)\n",
+    "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n",
+    "4. Run this cell to set up dependencies.\n",
+    "\"\"\"\n",
+    "# If you're using Google Colab and not running locally, run this cell.\n",
+    "!pip install wget\n",
+    "!pip install nemo_toolkit[tts]\n",
+    "\n",
+    "!mkdir configs\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/master/examples/tts/configs/tacotron2.yaml\n",
+    "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/master/examples/tts/configs/waveglow.yaml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import argparse\n",
+    "import math\n",
+    "import os\n",
+    "import copy\n",
+    "import shutil\n",
+    "import librosa\n",
+    "import matplotlib.pyplot as plt\n",
+    "from functools import partial\n",
+    "from scipy.io.wavfile import write\n",
+    "import numpy as np\n",
+    "import IPython.display as ipd\n",
+    "\n",
+    "from ruamel.yaml import YAML\n",
+    "\n",
+    "import torch\n",
+    "import nemo\n",
+    "import nemo.collections.asr as nemo_asr\n",
+    "import nemo.collections.tts as nemo_tts\n",
+    "import nemo.utils.argparse as nm_argparse\n",
+    "\n",
+    "logging = nemo.logging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download config files\n",
+    "config_path = '../configs/tacotron2.yaml'\n",
+    "waveglow_config_path = '../configs/waveglow.yaml'\n",
+    "\n",
+    "yaml = YAML(typ=\"safe\")\n",
+    "with open(config_path) as file:\n",
+    "    tacotron2_config = yaml.load(file)\n",
+    "    labels = tacotron2_config[\"labels\"]\n",
+    "    \n",
+    "with open(waveglow_config_path) as file:\n",
+    "    waveglow_config = yaml.load(file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Download pre-trained checkpoints\n",
+    "\n",
+    "Note: The checkpoint for WaveGlow is very large (>1GB), so please ensure you have sufficient storage space."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "base_checkpoint_path = './checkpoints/'\n",
+    "WAVEGLOW = os.path.join(base_checkpoint_path, 'WaveGlowNM.pt')\n",
+    "TACOTRON_ENCODER = os.path.join(base_checkpoint_path, 'Tacotron2Encoder.pt')\n",
+    "TACOTRON_DECODER = os.path.join(base_checkpoint_path, 'Tacotron2Decoder.pt')\n",
+    "TACOTRON_POSTNET = os.path.join(base_checkpoint_path, 'Tacotron2Postnet.pt')\n",
+    "TEXT_EMBEDDING = os.path.join(base_checkpoint_path, 'TextEmbedding.pt')\n",
+    "\n",
+    "if not os.path.exists(base_checkpoint_path):\n",
+    "    os.makedirs(base_checkpoint_path)\n",
+    "    \n",
+    "if not os.path.exists(WAVEGLOW):\n",
+    "    !wget wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow_ljspeech/versions/2/files/WaveGlowNM.pt -P {base_checkpoint_path};\n",
+    "\n",
+    "if not os.path.exists(TACOTRON_ENCODER):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2_ljspeech/versions/2/files/Tacotron2Encoder.pt -P {base_checkpoint_path};\n",
+    "        \n",
+    "if not os.path.exists(TACOTRON_DECODER):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2_ljspeech/versions/2/files/Tacotron2Decoder.pt -P {base_checkpoint_path};\n",
+    "\n",
+    "if not os.path.exists(TACOTRON_POSTNET):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2_ljspeech/versions/2/files/Tacotron2Postnet.pt -P {base_checkpoint_path};\n",
+    "\n",
+    "if not os.path.exists(TEXT_EMBEDDING):\n",
+    "    !wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2_ljspeech/versions/2/files/TextEmbedding.pt -P {base_checkpoint_path};\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare the Neural Factory\n",
+    "neural_factory = nemo.core.NeuralModuleFactory(\n",
+    "        optimization_level=\"O0\", backend=nemo.core.Backend.PyTorch\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Text Line Data Layer\n",
+    "\n",
+    "Construct a simple datalayer to load a single line of text (accepted from the user) and pass it to the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nemo.backends.pytorch import DataLayerNM\n",
+    "from nemo.core.neural_types import *\n",
+    "from nemo.utils.misc import pad_to\n",
+    "from nemo.collections.asr.parts.dataset import TranscriptDataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SentenceDataLayer(DataLayerNM):\n",
+    "    \"\"\"A simple Neural Module for loading textual transcript data.\n",
+    "    The path, labels, and eos_id arguments are dataset parameters.\n",
+    "\n",
+    "    Args:\n",
+    "        pad_id (int): Label position of padding symbol\n",
+    "        batch_size (int): Size of batches to generate in data loader\n",
+    "        drop_last (bool): Whether we drop last (possibly) incomplete batch.\n",
+    "            Defaults to False.\n",
+    "        num_workers (int): Number of processes to work on data loading (0 for\n",
+    "            just main process).\n",
+    "            Defaults to 0.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    @property\n",
+    "    def output_ports(self):\n",
+    "        \"\"\"Returns definitions of module output ports.\n",
+    "\n",
+    "        texts:\n",
+    "            0: AxisType(BatchTag)\n",
+    "\n",
+    "            1: AxisType(TimeTag)\n",
+    "\n",
+    "        texts_length:\n",
+    "            0: AxisType(BatchTag)\n",
+    "\n",
+    "        \"\"\"\n",
+    "        return {\n",
+    "            'texts': NeuralType(('B', 'T'), LabelsType()),\n",
+    "            'texts_length': NeuralType(tuple('B'), LengthsType()),\n",
+    "        }\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        path,\n",
+    "        labels,\n",
+    "        batch_size,\n",
+    "        bos_id=None,\n",
+    "        eos_id=None,\n",
+    "        pad_id=None,\n",
+    "        drop_last=False,\n",
+    "        num_workers=0,\n",
+    "        shuffle=True,\n",
+    "    ):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        # Set up dataset\n",
+    "        self.dataset_params = {\n",
+    "            'path': path,\n",
+    "            'labels': labels,\n",
+    "            'bos_id': bos_id,\n",
+    "            'eos_id': eos_id,\n",
+    "        }\n",
+    "\n",
+    "        self._dataset = TranscriptDataset(**self.dataset_params)\n",
+    "\n",
+    "        # Set up data loader\n",
+    "        sampler = None\n",
+    "        pad_id = 0 if pad_id is None else pad_id\n",
+    "        \n",
+    "    def update_dataset(self):\n",
+    "        self._dataset = TranscriptDataset(**self.dataset_params)\n",
+    "        logging.info('Dataset updated.')\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return len(self._dataset)\n",
+    "\n",
+    "    @property\n",
+    "    def dataset(self):\n",
+    "        return self._dataset\n",
+    "\n",
+    "    @property\n",
+    "    def data_iterator(self):\n",
+    "        return None\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create the Tacotron 2 + WaveGlow Neural Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_NMs(tacotron2_config, waveglow_config, labels, decoder_infer=False, waveglow_sigma=0.6):\n",
+    "    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(\n",
+    "        **tacotron2_config[\"AudioToMelSpectrogramPreprocessor\"][\"init_params\"]\n",
+    "    )\n",
+    "    \n",
+    "    text_embedding_params = copy.deepcopy(tacotron2_config[\"TextEmbedding\"][\"init_params\"])\n",
+    "    text_embedding_params['n_symbols'] = len(labels) + 3\n",
+    "    \n",
+    "    # Load checkpoint for text embedding\n",
+    "    text_embedding = nemo_tts.TextEmbedding(**text_embedding_params)\n",
+    "    text_embedding.restore_from(TEXT_EMBEDDING)\n",
+    "    \n",
+    "    # Load checkpoint for encoder\n",
+    "    t2_enc = nemo_tts.Tacotron2Encoder(**tacotron2_config[\"Tacotron2Encoder\"][\"init_params\"])\n",
+    "    t2_enc.restore_from(TACOTRON_ENCODER)\n",
+    "    \n",
+    "    # Load checkpoint for decoder\n",
+    "    decoder_params = copy.deepcopy(tacotron2_config[\"Tacotron2Decoder\"][\"init_params\"])\n",
+    "    \n",
+    "    t2_dec = nemo_tts.Tacotron2DecoderInfer(**decoder_params)    \n",
+    "    t2_dec.restore_from(TACOTRON_DECODER)\n",
+    "        \n",
+    "    # Load checkpoint for PortNet\n",
+    "    t2_postnet = nemo_tts.Tacotron2Postnet(**tacotron2_config[\"Tacotron2Postnet\"][\"init_params\"])\n",
+    "    t2_postnet.restore_from(TACOTRON_POSTNET)\n",
+    "    \n",
+    "    t2_loss = nemo_tts.Tacotron2Loss(**tacotron2_config[\"Tacotron2Loss\"][\"init_params\"])\n",
+    "    \n",
+    "    makegatetarget = nemo_tts.MakeGate()\n",
+    "\n",
+    "    total_weights = text_embedding.num_weights + t2_enc.num_weights + t2_dec.num_weights + t2_postnet.num_weights\n",
+    "\n",
+    "    logging.info('================================')\n",
+    "    logging.info(f\"Total number of parameters (Tacotron 2): {total_weights}\")\n",
+    "    logging.info('================================')\n",
+    "    \n",
+    "    \n",
+    "    # Load WaveGlow model\n",
+    "    waveglow_args = copy.deepcopy(waveglow_config[\"WaveGlowNM\"][\"init_params\"])\n",
+    "    waveglow_args['sigma'] = waveglow_sigma\n",
+    "    \n",
+    "    waveglow = nemo_tts.WaveGlowInferNM(**waveglow_args)\n",
+    "    waveglow.restore_from(WAVEGLOW)\n",
+    "    \n",
+    "    total_weights = waveglow.num_weights\n",
+    "    \n",
+    "    logging.info('================================')\n",
+    "    logging.info(f\"Total number of parameters (WaveGlow): {total_weights}\")\n",
+    "    logging.info('================================')\n",
+    "\n",
+    "    return (\n",
+    "        data_preprocessor,\n",
+    "        text_embedding,\n",
+    "        t2_enc,\n",
+    "        t2_dec,\n",
+    "        t2_postnet,\n",
+    "        t2_loss,\n",
+    "        makegatetarget,\n",
+    "    ), waveglow"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "neural_modules, waveglow = create_NMs(tacotron2_config, waveglow_config, labels, decoder_infer=True, waveglow_sigma=0.6);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Utility functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def update_text(text):\n",
+    "    if not os.path.exists('cache/'):\n",
+    "        os.makedirs('cache/')\n",
+    "        \n",
+    "    fp = os.path.join('cache', 'input.txt')\n",
+    "    with open(fp, 'w', encoding='utf8') as f:\n",
+    "        f.write('{}\\n'.format(text))\n",
+    "        f.flush()\n",
+    "    \n",
+    "    logging.info(\"Updated input file with value : %s\", text)\n",
+    "    return fp\n",
+    "        \n",
+    "def cleanup_cachedir():\n",
+    "    if os.path.exists('cache/'):\n",
+    "        shutil.rmtree('cache/')\n",
+    "    logging.info(\"Cleaned up cache directory !\")\n",
+    "    \n",
+    "def plot_and_save_spec(spectrogram, i, save_dir=None):\n",
+    "    fig, ax = plt.subplots(figsize=(12, 3))\n",
+    "    im = ax.imshow(spectrogram, aspect=\"auto\", origin=\"lower\", interpolation='none')\n",
+    "    plt.colorbar(im, ax=ax)\n",
+    "    plt.xlabel(\"Frames\")\n",
+    "    plt.ylabel(\"Channels\")\n",
+    "    plt.tight_layout()\n",
+    "    save_file = f\"spec_{i}.png\"\n",
+    "    if save_dir:\n",
+    "        save_file = os.path.join(save_dir, save_file)\n",
+    "    plt.savefig(save_file)\n",
+    "    plt.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Initializing the inference DAG\n",
+    "\n",
+    "To initialize the graph, we accept some text from the user. Later, we will accept the actual text that we want to convert to speech !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = input('Please enter some initial text here :')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filepath = update_text(text)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create inference DAG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Tacotron 2 DAG\n",
+    "(_, text_embedding, t2_enc, t2_dec, t2_postnet, _, _) = neural_modules\n",
+    "\n",
+    "data_layer = SentenceDataLayer(\n",
+    "    path=filepath,\n",
+    "    labels=labels,\n",
+    "    batch_size=1,\n",
+    "    num_workers=0,\n",
+    "    bos_id=len(labels),\n",
+    "    eos_id=len(labels) + 1,\n",
+    "    pad_id=len(labels) + 2,\n",
+    "    shuffle=False,\n",
+    ")\n",
+    "transcript, transcript_len = data_layer()\n",
+    "\n",
+    "transcript_embedded = text_embedding(char_phone=transcript)\n",
+    "\n",
+    "transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,)\n",
+    "\n",
+    "mel_decoder, gate, alignments, mel_len = t2_dec(\n",
+    "    char_phone_encoded=transcript_encoded, encoded_length=transcript_len,\n",
+    ")\n",
+    "\n",
+    "mel_postnet = t2_postnet(mel_input=mel_decoder)\n",
+    "\n",
+    "# WaveGlow DAG\n",
+    "audio_pred = waveglow(mel_spectrogram=mel_postnet)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup inference tensors\n",
+    "infer_tensors = [mel_postnet, gate, alignments, mel_len]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run inference DAG"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_tacotron2():\n",
+    "    logging.info(\"Running Tacotron 2\")\n",
+    "    # Run tacotron 2\n",
+    "    evaluated_tensors = neural_factory.infer(\n",
+    "        tensors=infer_tensors, offload_to_cpu=False\n",
+    "    )\n",
+    "    logging.info(\"Done Running Tacotron 2\")\n",
+    "    \n",
+    "    mel_len_val = evaluated_tensors[-1]\n",
+    "    \n",
+    "    filterbank = librosa.filters.mel(\n",
+    "        sr=tacotron2_config[\"sample_rate\"],\n",
+    "        n_fft=tacotron2_config[\"n_fft\"],\n",
+    "        n_mels=tacotron2_config[\"n_mels\"],\n",
+    "        fmax=tacotron2_config[\"fmax\"],\n",
+    "    )\n",
+    "    \n",
+    "    return evaluated_tensors, filterbank, mel_len_val\n",
+    "\n",
+    "def run_waveglow(save_dir, waveglow_denoiser_strength=0.0):\n",
+    "    # Run Tacotron 2 and WaveGlow\n",
+    "    evaluated_tensors, filterbank, mel_len_val = run_tacotron2()\n",
+    "    \n",
+    "    logging.info(\"Running Waveglow\")\n",
+    "    evaluated_tensors = neural_factory.infer(\n",
+    "        tensors=[audio_pred],\n",
+    "    )\n",
+    "    logging.info(\"Done Running Waveglow\")\n",
+    "    \n",
+    "    if waveglow_denoiser_strength > 0:\n",
+    "        logging.info(\"Setup WaveGlow denoiser\")\n",
+    "        waveglow.setup_denoiser()\n",
+    "    \n",
+    "    logging.info(\"Saving results to disk\")\n",
+    "    for i, batch in enumerate(evaluated_tensors[0]):\n",
+    "        audio = batch.cpu().numpy()\n",
+    "        for j, sample in enumerate(audio):\n",
+    "            sample_len = mel_len_val[i][j] * tacotron2_config[\"n_stride\"]\n",
+    "            sample = sample[:sample_len]\n",
+    "            save_file = f\"sample_{i * 32 + j}.wav\"\n",
+    "            if save_dir:\n",
+    "                save_file = os.path.join(save_dir, save_file)\n",
+    "            if waveglow_denoiser_strength > 0:\n",
+    "                sample, spec = waveglow.denoise(sample, strength=waveglow_denoiser_strength)\n",
+    "            else:\n",
+    "                spec, _ = librosa.core.magphase(librosa.core.stft(sample, n_fft=waveglow_config[\"n_fft\"]))\n",
+    "            write(save_file, waveglow_config[\"sample_rate\"], sample)\n",
+    "            spec = np.dot(filterbank, spec)\n",
+    "            spec = np.log(np.clip(spec, a_min=1e-5, a_max=None))\n",
+    "            plot_and_save_spec(spec, i * 32 + j, save_dir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run Tacotron 2 + WaveGlow on input text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = input('Please enter some initial text here :')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filepath = update_text(text)\n",
+    "data_layer.update_dataset()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare directories to save results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "savedir = 'results/'\n",
+    "saved_audio = os.path.join(savedir, 'sample_0.wav')\n",
+    "saved_spectrogram = os.path.join(savedir, 'spec_0.png')\n",
+    "\n",
+    "if not os.path.exists(savedir):\n",
+    "    os.makedirs(savedir)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate the audio\n",
+    "\n",
+    "Lets run the Tacotron 2 model and send the results to WaveGlow to generate the audio!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_waveglow(savedir, waveglow_denoiser_strength=0.0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lets hear the generated audio !"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Audio(saved_audio, rate=16000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ipd.Image(saved_spectrogram)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleanup cachedir"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleanup_cachedir()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.7.6 64-bit ('NeMo': conda)",
+   "language": "python",
+   "name": "python37664bitnemoconda43f94a748a2e4953b0129556ecdf4f62"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/tts/tacotron2.py b/examples/tts/tacotron2.py
index 332da22e0be5..f87ff213a7ba 100644
--- a/examples/tts/tacotron2.py
+++ b/examples/tts/tacotron2.py
@@ -1,6 +1,17 @@
-# Copyright (c) 2019 NVIDIA Corporation
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
-import copy
 import math
 import os
 from functools import partial
@@ -17,6 +28,7 @@
     tacotron2_process_eval_batch,
     tacotron2_process_final_eval,
 )
+from nemo.utils import logging
 from nemo.utils.lr_policies import CosineAnnealing
 
 
@@ -37,19 +49,16 @@ def parse_args():
     )
 
     # Overwrite default args
-    parser.add_argument(
-        "--max_steps", type=int, default=None, required=False, help="max number of steps to train",
-    )
-    parser.add_argument(
-        "--num_epochs", type=int, default=None, required=False, help="number of epochs to train",
-    )
-    parser.add_argument(
-        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
-    )
+    parser.add_argument("--max_steps", type=int, default=None, help="max number of steps to train")
+    parser.add_argument("--num_epochs", type=int, default=None, help="number of epochs to train")
+    parser.add_argument("--model_config", type=str, required=True, help="model configuration file: model.yaml")
     parser.add_argument("--grad_norm_clip", type=float, default=1.0, help="gradient clipping")
+    parser.add_argument("--min_lr", type=float, default=1e-5, help="minimum learning rate to decay to")
     parser.add_argument(
-        "--min_lr", type=float, default=1e-5, help="minimum learning rate to decay to",
+        "--do_not_eval_at_start", action='store_true', help="toggle for whether to do evaluation on step 0"
     )
+    parser.add_argument("--decoder_force", action='store_true', help="toggle for teacher forcing during evaluation")
+    parser.add_argument("--random_seed", default=None, type=int, help="random seed for torch, numpy, and random")
 
     # Create new args
     parser.add_argument("--exp_name", default="Tacotron2", type=str)
@@ -66,7 +75,7 @@ def parse_args():
     exp_directory = [
         f"{args.exp_name}-lr_{args.lr}-bs_{args.batch_size}",
         "",
-        (f"-wd_{args.weight_decay}-opt_{args.optimizer}" f"-ips_{args.iter_per_step}"),
+        f"-wd_{args.weight_decay}-opt_{args.optimizer}-ips_{args.iter_per_step}",
     ]
     if args.max_steps:
         exp_directory[1] = f"-s_{args.max_steps}"
@@ -77,27 +86,30 @@ def parse_args():
     return args, "".join(exp_directory)
 
 
-def create_NMs(tacotron2_params, decoder_infer=False):
-    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-        **tacotron2_params["AudioToMelSpectrogramPreprocessor"]
+def create_NMs(tacotron2_config_file, labels, decoder_infer=False, decoder_force=False):
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.import_from_config(
+        tacotron2_config_file, "AudioToMelSpectrogramPreprocessor"
     )
-    text_embedding = nemo_tts.TextEmbedding(
-        len(tacotron2_params["labels"]) + 3, **tacotron2_params["TextEmbedding"],  # + 3 special chars
+    text_embedding = nemo_tts.TextEmbedding.import_from_config(
+        tacotron2_config_file, "TextEmbedding", overwrite_params={"n_symbols": len(labels) + 3}
     )
-    t2_enc = nemo_tts.Tacotron2Encoder(**tacotron2_params["Tacotron2Encoder"])
+    t2_enc = nemo_tts.Tacotron2Encoder.import_from_config(tacotron2_config_file, "Tacotron2Encoder")
     if decoder_infer:
-        t2_dec = nemo_tts.Tacotron2DecoderInfer(**tacotron2_params["Tacotron2Decoder"])
+        t2_dec = nemo_tts.Tacotron2DecoderInfer.import_from_config(tacotron2_config_file, "Tacotron2DecoderInfer")
     else:
-        t2_dec = nemo_tts.Tacotron2Decoder(**tacotron2_params["Tacotron2Decoder"])
-    t2_postnet = nemo_tts.Tacotron2Postnet(**tacotron2_params["Tacotron2Postnet"])
-    t2_loss = nemo_tts.Tacotron2Loss(**tacotron2_params["Tacotron2Loss"])
+        t2_dec = nemo_tts.Tacotron2Decoder.import_from_config(
+            tacotron2_config_file, "Tacotron2Decoder", overwrite_params={"force": decoder_force}
+        )
+    t2_postnet = nemo_tts.Tacotron2Postnet.import_from_config(tacotron2_config_file, "Tacotron2Postnet")
+    t2_loss = nemo_tts.Tacotron2Loss.import_from_config(tacotron2_config_file, "Tacotron2Loss")
     makegatetarget = nemo_tts.MakeGate()
 
     total_weights = text_embedding.num_weights + t2_enc.num_weights + t2_dec.num_weights + t2_postnet.num_weights
 
-    nemo.logging.info('================================')
-    nemo.logging.info(f"Total number of parameters: {total_weights}")
-    nemo.logging.info('================================')
+    logging.info('================================')
+    logging.info(f"Total number of parameters: {total_weights}")
+    logging.info('================================')
+
     return (
         data_preprocessor,
         text_embedding,
@@ -112,41 +124,39 @@ def create_NMs(tacotron2_params, decoder_infer=False):
 def create_train_dag(
     neural_factory,
     neural_modules,
-    tacotron2_params,
+    tacotron2_config_file,
     train_dataset,
     batch_size,
     log_freq,
     checkpoint_save_freq,
+    labels,
     cpu_per_dl=1,
 ):
-    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget,) = neural_modules
-
-    train_dl_params = copy.deepcopy(tacotron2_params["AudioToTextDataLayer"])
-    train_dl_params.update(tacotron2_params["AudioToTextDataLayer"]["train"])
-    del train_dl_params["train"]
-    del train_dl_params["eval"]
-
-    data_layer = nemo_asr.AudioToTextDataLayer(
-        manifest_filepath=train_dataset,
-        labels=tacotron2_params['labels'],
-        bos_id=len(tacotron2_params['labels']),
-        eos_id=len(tacotron2_params['labels']) + 1,
-        pad_id=len(tacotron2_params['labels']) + 2,
-        batch_size=batch_size,
-        num_workers=cpu_per_dl,
-        **train_dl_params,
+    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget) = neural_modules
+
+    data_layer = nemo_asr.AudioToTextDataLayer.import_from_config(
+        tacotron2_config_file,
+        "AudioToTextDataLayer_train",
+        overwrite_params={
+            "manifest_filepath": train_dataset,
+            "batch_size": batch_size,
+            "num_workers": cpu_per_dl,
+            "bos_id": len(labels),
+            "eos_id": len(labels) + 1,
+            "pad_id": len(labels) + 2,
+        },
     )
 
     N = len(data_layer)
     steps_per_epoch = math.ceil(N / (batch_size * neural_factory.world_size))
-    nemo.logging.info(f'Have {N} examples to train on.')
+    logging.info(f'Have {N} examples to train on.')
 
     # Train DAG
     audio, audio_len, transcript, transcript_len = data_layer()
     spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
 
     transcript_embedded = text_embedding(char_phone=transcript)
-    transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,)
+    transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
     mel_decoder, gate, alignments = t2_dec(
         char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
     )
@@ -165,7 +175,7 @@ def create_train_dag(
     # Callbacks needed to print info to console and Tensorboard
     train_callback = nemo.core.SimpleLossLoggerCallback(
         tensors=[loss_t, spec_target, mel_postnet, gate, gate_target, alignments],
-        print_func=lambda x: nemo.logging.info(f"Loss: {x[0].data}"),
+        print_func=lambda x: logging.info(f"Loss: {x[0].data}"),
         log_to_tb_func=partial(tacotron2_log_to_tb_func, log_images=True, log_images_freq=log_freq),
         tb_writer=neural_factory.tb_writer,
     )
@@ -177,34 +187,39 @@ def create_train_dag(
 
 
 def create_eval_dags(
-    neural_factory, neural_modules, tacotron2_params, eval_datasets, eval_batch_size, eval_freq, cpu_per_dl=1,
+    neural_factory,
+    neural_modules,
+    tacotron2_config_file,
+    eval_datasets,
+    eval_batch_size,
+    eval_freq,
+    labels,
+    cpu_per_dl=1,
+    do_not_eval_at_start=False,
 ):
-    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget,) = neural_modules
-
-    eval_dl_params = copy.deepcopy(tacotron2_params["AudioToTextDataLayer"])
-    eval_dl_params.update(tacotron2_params["AudioToTextDataLayer"]["eval"])
-    del eval_dl_params["train"]
-    del eval_dl_params["eval"]
+    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget) = neural_modules
 
     callbacks = []
     # assemble eval DAGs
     for eval_dataset in eval_datasets:
-        data_layer_eval = nemo_asr.AudioToTextDataLayer(
-            manifest_filepath=eval_dataset,
-            labels=tacotron2_params['labels'],
-            bos_id=len(tacotron2_params['labels']),
-            eos_id=len(tacotron2_params['labels']) + 1,
-            pad_id=len(tacotron2_params['labels']) + 2,
-            batch_size=eval_batch_size,
-            num_workers=cpu_per_dl,
-            **eval_dl_params,
+        data_layer_eval = nemo_asr.AudioToTextDataLayer.import_from_config(
+            tacotron2_config_file,
+            "AudioToTextDataLayer_eval",
+            overwrite_params={
+                "manifest_filepath": eval_dataset,
+                "batch_size": eval_batch_size,
+                "num_workers": cpu_per_dl,
+                "bos_id": len(labels),
+                "eos_id": len(labels) + 1,
+                "pad_id": len(labels) + 2,
+            },
         )
 
         audio, audio_len, transcript, transcript_len = data_layer_eval()
         spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
 
         transcript_embedded = text_embedding(char_phone=transcript)
-        transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,)
+        transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
         mel_decoder, gate, alignments = t2_dec(
             char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
         )
@@ -237,6 +252,7 @@ def create_eval_dags(
             tb_writer_func=partial(tacotron2_eval_log_to_tb_func, tag=tagname),
             eval_step=eval_freq,
             tb_writer=neural_factory.tb_writer,
+            eval_at_start=not do_not_eval_at_start,
         )
 
         callbacks.append(eval_callback)
@@ -246,13 +262,15 @@ def create_eval_dags(
 def create_all_dags(
     neural_factory,
     neural_modules,
-    tacotron2_params,
+    tacotron2_config_file,
     train_dataset,
     batch_size,
     eval_freq,
+    labels,
     checkpoint_save_freq=None,
     eval_datasets=None,
     eval_batch_size=None,
+    do_not_eval_at_start=False,
 ):
     # Calculate num_workers for dataloader
     cpu_per_dl = max(int(os.cpu_count() / neural_factory.world_size), 1)
@@ -260,12 +278,13 @@ def create_all_dags(
     training_loss, training_callbacks, steps_per_epoch = create_train_dag(
         neural_factory=neural_factory,
         neural_modules=neural_modules,
-        tacotron2_params=tacotron2_params,
+        tacotron2_config_file=tacotron2_config_file,
         train_dataset=train_dataset,
         batch_size=batch_size,
         log_freq=eval_freq,
         checkpoint_save_freq=checkpoint_save_freq,
         cpu_per_dl=cpu_per_dl,
+        labels=labels,
     )
 
     eval_callbacks = []
@@ -273,14 +292,16 @@ def create_all_dags(
         eval_callbacks = create_eval_dags(
             neural_factory=neural_factory,
             neural_modules=neural_modules,
-            tacotron2_params=tacotron2_params,
+            tacotron2_config_file=tacotron2_config_file,
             eval_datasets=eval_datasets,
             eval_batch_size=eval_batch_size,
             eval_freq=eval_freq,
             cpu_per_dl=cpu_per_dl,
+            labels=labels,
+            do_not_eval_at_start=do_not_eval_at_start,
         )
     else:
-        nemo.logging.info("There were no val datasets passed")
+        logging.info("There were no val datasets passed")
 
     callbacks = training_callbacks + eval_callbacks
     return training_loss, callbacks, steps_per_epoch
@@ -304,28 +325,32 @@ def main():
         files_to_copy=[args.model_config, __file__],
         cudnn_benchmark=args.cudnn_benchmark,
         tensorboard_dir=args.tensorboard_dir,
+        random_seed=args.random_seed,
     )
 
     if args.local_rank is not None:
-        nemo.logging.info('Doing ALL GPU')
+        logging.info('Doing ALL GPU')
 
     yaml = YAML(typ="safe")
     with open(args.model_config) as file:
         tacotron2_params = yaml.load(file)
+        labels = tacotron2_params["labels"]
     # instantiate neural modules
-    neural_modules = create_NMs(tacotron2_params)
+    neural_modules = create_NMs(args.model_config, labels, decoder_force=args.decoder_force)
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(
         neural_factory=neural_factory,
         neural_modules=neural_modules,
-        tacotron2_params=tacotron2_params,
+        tacotron2_config_file=args.model_config,
         train_dataset=args.train_dataset,
         batch_size=args.batch_size,
         eval_freq=args.eval_freq,
         checkpoint_save_freq=args.checkpoint_save_freq,
         eval_datasets=args.eval_datasets,
         eval_batch_size=args.eval_batch_size,
+        labels=labels,
+        do_not_eval_at_start=args.do_not_eval_at_start,
     )
 
     # train model
diff --git a/examples/tts/tacotron2_v0p9.py b/examples/tts/tacotron2_v0p9.py
new file mode 100644
index 000000000000..e6339c5e542a
--- /dev/null
+++ b/examples/tts/tacotron2_v0p9.py
@@ -0,0 +1,360 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Tacotron2_v0p9.py is functionally the same as tacotron2.py but with support for older model yaml configs. This file
+will be removed in future versions
+"""
+import argparse
+import copy
+import math
+import os
+from functools import partial
+
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.collections.tts as nemo_tts
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.tts import (
+    tacotron2_eval_log_to_tb_func,
+    tacotron2_log_to_tb_func,
+    tacotron2_process_eval_batch,
+    tacotron2_process_final_eval,
+)
+from nemo.utils import logging
+from nemo.utils.lr_policies import CosineAnnealing
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description='Tacotron2', conflict_handler='resolve',
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="adam",
+        batch_size=48,
+        eval_batch_size=32,
+        lr=0.001,
+        amp_opt_level="O0",
+        create_tb_writer=True,
+        lr_policy=None,
+        weight_decay=1e-6,
+    )
+
+    # Overwrite default args
+    parser.add_argument("--max_steps", type=int, default=None, help="max number of steps to train")
+    parser.add_argument("--num_epochs", type=int, default=None, help="number of epochs to train")
+    parser.add_argument("--model_config", type=str, required=True, help="model configuration file: model.yaml")
+    parser.add_argument("--grad_norm_clip", type=float, default=1.0, help="gradient clipping")
+    parser.add_argument("--min_lr", type=float, default=1e-5, help="minimum learning rate to decay to")
+
+    # Create new args
+    parser.add_argument("--exp_name", default="Tacotron2", type=str)
+
+    args = parser.parse_args()
+
+    if args.lr_policy:
+        raise NotImplementedError("Tacotron 2 does not support lr policy arg")
+    if args.max_steps is not None and args.num_epochs is not None:
+        raise ValueError("Either max_steps or num_epochs should be provided.")
+    if args.eval_freq % 25 != 0:
+        raise ValueError("eval_freq should be a multiple of 25.")
+
+    exp_directory = [
+        f"{args.exp_name}-lr_{args.lr}-bs_{args.batch_size}",
+        "",
+        f"-wd_{args.weight_decay}-opt_{args.optimizer}-ips_{args.iter_per_step}",
+    ]
+    if args.max_steps:
+        exp_directory[1] = f"-s_{args.max_steps}"
+    elif args.num_epochs:
+        exp_directory[1] = f"-e_{args.num_epochs}"
+    else:
+        raise ValueError("Both max_steps and num_epochs were None.")
+    return args, "".join(exp_directory)
+
+
+def create_NMs(tacotron2_params, decoder_infer=False):
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+        **tacotron2_params["AudioToMelSpectrogramPreprocessor"]
+    )
+    text_embedding = nemo_tts.TextEmbedding(
+        len(tacotron2_params["labels"]) + 3, **tacotron2_params["TextEmbedding"],  # + 3 special chars
+    )
+    t2_enc = nemo_tts.Tacotron2Encoder(**tacotron2_params["Tacotron2Encoder"])
+    if decoder_infer:
+        t2_dec = nemo_tts.Tacotron2DecoderInfer(**tacotron2_params["Tacotron2Decoder"])
+    else:
+        t2_dec = nemo_tts.Tacotron2Decoder(**tacotron2_params["Tacotron2Decoder"])
+    t2_postnet = nemo_tts.Tacotron2Postnet(**tacotron2_params["Tacotron2Postnet"])
+    t2_loss = nemo_tts.Tacotron2Loss(**tacotron2_params["Tacotron2Loss"])
+    makegatetarget = nemo_tts.MakeGate()
+
+    total_weights = text_embedding.num_weights + t2_enc.num_weights + t2_dec.num_weights + t2_postnet.num_weights
+
+    logging.info('================================')
+    logging.info(f"Total number of parameters: {total_weights}")
+    logging.info('================================')
+
+    return (
+        data_preprocessor,
+        text_embedding,
+        t2_enc,
+        t2_dec,
+        t2_postnet,
+        t2_loss,
+        makegatetarget,
+    )
+
+
+def create_train_dag(
+    neural_factory,
+    neural_modules,
+    tacotron2_params,
+    train_dataset,
+    batch_size,
+    log_freq,
+    checkpoint_save_freq,
+    cpu_per_dl=1,
+):
+    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget) = neural_modules
+
+    train_dl_params = copy.deepcopy(tacotron2_params["AudioToTextDataLayer"])
+    train_dl_params.update(tacotron2_params["AudioToTextDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+
+    data_layer = nemo_asr.AudioToTextDataLayer(
+        manifest_filepath=train_dataset,
+        labels=tacotron2_params['labels'],
+        bos_id=len(tacotron2_params['labels']),
+        eos_id=len(tacotron2_params['labels']) + 1,
+        pad_id=len(tacotron2_params['labels']) + 2,
+        batch_size=batch_size,
+        num_workers=cpu_per_dl,
+        **train_dl_params,
+    )
+
+    N = len(data_layer)
+    steps_per_epoch = math.ceil(N / (batch_size * neural_factory.world_size))
+    logging.info(f'Have {N} examples to train on.')
+
+    # Train DAG
+    audio, audio_len, transcript, transcript_len = data_layer()
+    spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
+
+    transcript_embedded = text_embedding(char_phone=transcript)
+    transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
+    mel_decoder, gate, alignments = t2_dec(
+        char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
+    )
+    mel_postnet = t2_postnet(mel_input=mel_decoder)
+    gate_target = makegatetarget(mel_target=spec_target, target_len=spec_target_len)
+    loss_t = t2_loss(
+        mel_out=mel_decoder,
+        mel_out_postnet=mel_postnet,
+        gate_out=gate,
+        mel_target=spec_target,
+        gate_target=gate_target,
+        target_len=spec_target_len,
+        seq_len=audio_len,
+    )
+
+    # Callbacks needed to print info to console and Tensorboard
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        tensors=[loss_t, spec_target, mel_postnet, gate, gate_target, alignments],
+        print_func=lambda x: logging.info(f"Loss: {x[0].data}"),
+        log_to_tb_func=partial(tacotron2_log_to_tb_func, log_images=True, log_images_freq=log_freq),
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    chpt_callback = nemo.core.CheckpointCallback(folder=neural_factory.checkpoint_dir, step_freq=checkpoint_save_freq)
+
+    callbacks = [train_callback, chpt_callback]
+    return loss_t, callbacks, steps_per_epoch
+
+
+def create_eval_dags(
+    neural_factory, neural_modules, tacotron2_params, eval_datasets, eval_batch_size, eval_freq, cpu_per_dl=1,
+):
+    (data_preprocessor, text_embedding, t2_enc, t2_dec, t2_postnet, t2_loss, makegatetarget) = neural_modules
+
+    eval_dl_params = copy.deepcopy(tacotron2_params["AudioToTextDataLayer"])
+    eval_dl_params.update(tacotron2_params["AudioToTextDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+
+    callbacks = []
+    # assemble eval DAGs
+    for eval_dataset in eval_datasets:
+        data_layer_eval = nemo_asr.AudioToTextDataLayer(
+            manifest_filepath=eval_dataset,
+            labels=tacotron2_params['labels'],
+            bos_id=len(tacotron2_params['labels']),
+            eos_id=len(tacotron2_params['labels']) + 1,
+            pad_id=len(tacotron2_params['labels']) + 2,
+            batch_size=eval_batch_size,
+            num_workers=cpu_per_dl,
+            **eval_dl_params,
+        )
+
+        audio, audio_len, transcript, transcript_len = data_layer_eval()
+        spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
+
+        transcript_embedded = text_embedding(char_phone=transcript)
+        transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
+        mel_decoder, gate, alignments = t2_dec(
+            char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
+        )
+        mel_postnet = t2_postnet(mel_input=mel_decoder)
+        gate_target = makegatetarget(mel_target=spec_target, target_len=spec_target_len)
+        loss = t2_loss(
+            mel_out=mel_decoder,
+            mel_out_postnet=mel_postnet,
+            gate_out=gate,
+            mel_target=spec_target,
+            gate_target=gate_target,
+            target_len=spec_target_len,
+            seq_len=audio_len,
+        )
+
+        # create corresponding eval callback
+        tagname = os.path.basename(eval_dataset).split(".")[0]
+        eval_tensors = [
+            loss,
+            spec_target,
+            mel_postnet,
+            gate,
+            gate_target,
+            alignments,
+        ]
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=eval_tensors,
+            user_iter_callback=tacotron2_process_eval_batch,
+            user_epochs_done_callback=partial(tacotron2_process_final_eval, tag=tagname),
+            tb_writer_func=partial(tacotron2_eval_log_to_tb_func, tag=tagname),
+            eval_step=eval_freq,
+            tb_writer=neural_factory.tb_writer,
+        )
+
+        callbacks.append(eval_callback)
+    return callbacks
+
+
+def create_all_dags(
+    neural_factory,
+    neural_modules,
+    tacotron2_params,
+    train_dataset,
+    batch_size,
+    eval_freq,
+    checkpoint_save_freq=None,
+    eval_datasets=None,
+    eval_batch_size=None,
+):
+    # Calculate num_workers for dataloader
+    cpu_per_dl = max(int(os.cpu_count() / neural_factory.world_size), 1)
+
+    training_loss, training_callbacks, steps_per_epoch = create_train_dag(
+        neural_factory=neural_factory,
+        neural_modules=neural_modules,
+        tacotron2_params=tacotron2_params,
+        train_dataset=train_dataset,
+        batch_size=batch_size,
+        log_freq=eval_freq,
+        checkpoint_save_freq=checkpoint_save_freq,
+        cpu_per_dl=cpu_per_dl,
+    )
+
+    eval_callbacks = []
+    if eval_datasets:
+        eval_callbacks = create_eval_dags(
+            neural_factory=neural_factory,
+            neural_modules=neural_modules,
+            tacotron2_params=tacotron2_params,
+            eval_datasets=eval_datasets,
+            eval_batch_size=eval_batch_size,
+            eval_freq=eval_freq,
+            cpu_per_dl=cpu_per_dl,
+        )
+    else:
+        logging.info("There were no val datasets passed")
+
+    callbacks = training_callbacks + eval_callbacks
+    return training_loss, callbacks, steps_per_epoch
+
+
+def main():
+    args, name = parse_args()
+
+    log_dir = name
+    if args.work_dir:
+        log_dir = os.path.join(args.work_dir, name)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=log_dir,
+        checkpoint_dir=args.checkpoint_dir,
+        create_tb_writer=args.create_tb_writer,
+        files_to_copy=[args.model_config, __file__],
+        cudnn_benchmark=args.cudnn_benchmark,
+        tensorboard_dir=args.tensorboard_dir,
+    )
+
+    if args.local_rank is not None:
+        logging.info('Doing ALL GPU')
+
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as file:
+        tacotron2_params = yaml.load(file)
+    # instantiate neural modules
+    neural_modules = create_NMs(tacotron2_params)
+
+    # build dags
+    train_loss, callbacks, steps_per_epoch = create_all_dags(
+        neural_factory=neural_factory,
+        neural_modules=neural_modules,
+        tacotron2_params=tacotron2_params,
+        train_dataset=args.train_dataset,
+        batch_size=args.batch_size,
+        eval_freq=args.eval_freq,
+        checkpoint_save_freq=args.checkpoint_save_freq,
+        eval_datasets=args.eval_datasets,
+        eval_batch_size=args.eval_batch_size,
+    )
+
+    # train model
+    total_steps = args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        lr_policy=CosineAnnealing(total_steps, min_lr=args.min_lr),
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "max_steps": args.max_steps,
+            "lr": args.lr,
+            "weight_decay": args.weight_decay,
+            "grad_norm_clip": args.grad_norm_clip,
+        },
+        batches_per_step=args.iter_per_step,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/tts/tts_infer.py b/examples/tts/tts_infer.py
index 81e828d680e8..df95b31063c0 100644
--- a/examples/tts/tts_infer.py
+++ b/examples/tts/tts_infer.py
@@ -1,6 +1,17 @@
-# Copyright (c) 2019 NVIDIA Corporation
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
-import copy
 import os
 
 import librosa
@@ -13,8 +24,7 @@
 import nemo
 import nemo.collections.asr as nemo_asr
 import nemo.collections.tts as nemo_tts
-
-logging = nemo.logging
+from nemo.utils import logging
 
 
 def parse_args():
@@ -40,7 +50,7 @@ def parse_args():
     parser.add_argument(
         "--vocoder_model_config",
         type=str,
-        help=("vocoder model configuration file: model.yaml. Not required for " "griffin-lim."),
+        help=("vocoder model configuration file: model.yaml. Not required for griffin-lim."),
     )
     parser.add_argument(
         "--spec_model_load_dir", type=str, required=True, help="directory containing checkpoints for spec model",
@@ -48,7 +58,7 @@ def parse_args():
     parser.add_argument(
         "--vocoder_model_load_dir",
         type=str,
-        help=("directory containing checkpoints for vocoder model. Not " "required for griffin-lim"),
+        help=("directory containing checkpoints for vocoder model. Not required for griffin-lim"),
     )
     parser.add_argument("--eval_dataset", type=str, required=True)
     parser.add_argument("--save_dir", type=str, help="directory to save audio files to")
@@ -59,8 +69,8 @@ def parse_args():
         type=float,
         default=2048,
         help=(
-            "This is multiplied with the linear spectrogram. This is "
-            "to avoid audio sounding muted due to mel filter normalization"
+            "This is multiplied with the linear spectrogram. This is to avoid audio sounding muted due to mel "
+            "filter normalization"
         ),
     )
     parser.add_argument(
@@ -68,9 +78,8 @@ def parse_args():
         type=float,
         default=1.2,
         help=(
-            "The linear spectrogram is raised to this power prior to running"
-            "the Griffin Lim algorithm. A power of greater than 1 has been "
-            "shown to improve audio quality."
+            "The linear spectrogram is raised to this power prior to running the Griffin Lim algorithm. A power of "
+            "greater than 1 has been shown to improve audio quality."
         ),
     )
 
@@ -79,7 +88,7 @@ def parse_args():
         "--waveglow_denoiser_strength",
         type=float,
         default=0.0,
-        help=("denoiser strength for waveglow. Start with 0 and slowly " "increment"),
+        help="denoiser strength for waveglow. Start with 0 and slowly increment",
     )
     parser.add_argument("--waveglow_sigma", type=float, default=0.6)
 
@@ -89,8 +98,8 @@ def parse_args():
     args = parser.parse_args()
     if args.vocoder == "griffin-lim" and (args.vocoder_model_config or args.vocoder_model_load_dir):
         raise ValueError(
-            "Griffin-Lim was specified as the vocoder but the a value for "
-            "vocoder_model_config or vocoder_model_load_dir was passed."
+            "Griffin-Lim was specified as the vocoder but the a value for vocoder_model_config or "
+            "vocoder_model_load_dir was passed."
         )
     return args
 
@@ -128,19 +137,19 @@ def plot_and_save_spec(spectrogram, i, save_dir=None):
 
 
 def create_infer_dags(
-    neural_factory, neural_modules, tacotron2_params, infer_dataset, infer_batch_size, cpu_per_dl=1,
+    neural_factory, neural_modules, labels, infer_dataset, infer_batch_size, cpu_per_dl=1,
 ):
     (_, text_embedding, t2_enc, t2_dec, t2_postnet, _, _) = neural_modules
 
     data_layer = nemo_asr.TranscriptDataLayer(
         path=infer_dataset,
-        labels=tacotron2_params['labels'],
+        labels=labels,
         batch_size=infer_batch_size,
         num_workers=cpu_per_dl,
         # load_audio=False,
-        bos_id=len(tacotron2_params['labels']),
-        eos_id=len(tacotron2_params['labels']) + 1,
-        pad_id=len(tacotron2_params['labels']) + 2,
+        bos_id=len(labels),
+        eos_id=len(labels) + 1,
+        pad_id=len(labels) + 2,
         shuffle=False,
     )
     transcript, transcript_len = data_layer()
@@ -174,11 +183,12 @@ def main():
         yaml = YAML(typ="safe")
         with open(args.spec_model_config) as file:
             tacotron2_params = yaml.load(file)
-        spec_neural_modules = create_NMs(tacotron2_params, decoder_infer=True)
+            labels = tacotron2_params["labels"]
+        spec_neural_modules = create_NMs(args.spec_model_config, labels=labels, decoder_infer=True)
         infer_tensors = create_infer_dags(
             neural_factory=neural_factory,
             neural_modules=spec_neural_modules,
-            tacotron2_params=tacotron2_params,
+            labels=labels,
             infer_dataset=args.eval_dataset,
             infer_batch_size=args.batch_size,
         )
@@ -220,11 +230,12 @@ def main():
                 "Using waveglow as the vocoder requires the "
                 "--vocoder_model_config and --vocoder_model_load_dir args"
             )
-
         yaml = YAML(typ="safe")
         with open(args.vocoder_model_config) as file:
             waveglow_params = yaml.load(file)
-        waveglow = nemo_tts.WaveGlowInferNM(sigma=args.waveglow_sigma, **waveglow_params["WaveGlowNM"])
+        waveglow = nemo_tts.WaveGlowInferNM.import_from_config(
+            args.vocoder_model_config, "WaveGlowInferNM", overwrite_params={"sigma": args.waveglow_sigma}
+        )
         audio_pred = waveglow(mel_spectrogram=mel_pred)
         # waveglow.restore_from(args.vocoder_model_load_dir)
 
diff --git a/examples/tts/waveglow.py b/examples/tts/waveglow.py
index 42657137e9eb..63e262099f8a 100644
--- a/examples/tts/waveglow.py
+++ b/examples/tts/waveglow.py
@@ -1,18 +1,26 @@
-# Copyright (c) 2019 NVIDIA Corporation
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
-import copy
 import os
 from functools import partial
 
-from ruamel.yaml import YAML
-
 import nemo
 import nemo.collections.asr as nemo_asr
 import nemo.collections.tts as nemo_tts
 import nemo.utils.argparse as nm_argparse
 from nemo.collections.tts import waveglow_eval_log_to_tb_func, waveglow_log_to_tb_func, waveglow_process_eval_batch
-
-logging = nemo.logging
+from nemo.utils import logging
 
 
 def parse_args():
@@ -32,15 +40,9 @@ def parse_args():
     )
 
     # Overwrite default args
-    parser.add_argument(
-        "--max_steps", type=int, default=None, required=False, help="max number of steps to train",
-    )
-    parser.add_argument(
-        "--num_epochs", type=int, default=None, required=False, help="number of epochs to train",
-    )
-    parser.add_argument(
-        "--model_config", type=str, required=True, help="model configuration file: model.yaml",
-    )
+    parser.add_argument("--max_steps", type=int, default=None, help="max number of steps to train")
+    parser.add_argument("--num_epochs", type=int, default=None, help="number of epochs to train")
+    parser.add_argument("--model_config", type=str, required=True, help="model configuration file: model.yaml")
 
     # Create new args
     parser.add_argument("--exp_name", default="Waveglow", type=str)
@@ -57,7 +59,7 @@ def parse_args():
     exp_directory = [
         f"{args.exp_name}-lr_{args.lr}-bs_{args.batch_size}",
         "",
-        (f"-wd_{args.weight_decay}-opt_{args.optimizer}" f"-ips_{args.iter_per_step}"),
+        (f"-wd_{args.weight_decay}-opt_{args.optimizer}-ips_{args.iter_per_step}"),
     ]
     if args.max_steps:
         exp_directory[1] = f"-s_{args.max_steps}"
@@ -68,12 +70,12 @@ def parse_args():
     return args, "".join(exp_directory)
 
 
-def create_NMs(waveglow_params):
-    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
-        **waveglow_params["AudioToMelSpectrogramPreprocessor"]
+def create_NMs(waveglow_config_file):
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor.import_from_config(
+        waveglow_config_file, "AudioToMelSpectrogramPreprocessor"
     )
-    waveglow = nemo_tts.WaveGlowNM(**waveglow_params["WaveGlowNM"])
-    waveglow_loss = nemo_tts.WaveGlowLoss()
+    waveglow = nemo_tts.WaveGlowNM.import_from_config(waveglow_config_file, "WaveGlowNM")
+    waveglow_loss = nemo_tts.WaveGlowLoss.import_from_config(waveglow_config_file, "WaveGlowLoss")
 
     logging.info('================================')
     logging.info(f"Total number of parameters: {waveglow.num_weights}")
@@ -82,17 +84,19 @@ def create_NMs(waveglow_params):
 
 
 def create_train_dag(
-    neural_factory, neural_modules, waveglow_params, train_dataset, batch_size, checkpoint_save_freq, cpu_per_dl=1,
+    neural_factory,
+    neural_modules,
+    waveglow_config_file,
+    train_dataset,
+    batch_size,
+    checkpoint_save_freq,
+    cpu_per_dl=1,
 ):
     data_preprocessor, waveglow, waveglow_loss = neural_modules
-
-    train_dl_params = copy.deepcopy(waveglow_params["AudioDataLayer"])
-    train_dl_params.update(waveglow_params["AudioDataLayer"]["train"])
-    del train_dl_params["train"]
-    del train_dl_params["eval"]
-
-    data_layer = nemo_tts.AudioDataLayer(
-        manifest_filepath=train_dataset, batch_size=batch_size, num_workers=cpu_per_dl, **train_dl_params,
+    data_layer = nemo_tts.AudioDataLayer.import_from_config(
+        waveglow_config_file,
+        "AudioDataLayer_train",
+        overwrite_params={"manifest_filepath": train_dataset, "batch_size": batch_size, "num_workers": cpu_per_dl},
     )
 
     N = len(data_layer)
@@ -121,20 +125,21 @@ def create_train_dag(
 
 
 def create_eval_dags(
-    neural_factory, neural_modules, waveglow_params, eval_datasets, eval_batch_size, eval_freq, cpu_per_dl=1,
+    neural_factory, neural_modules, waveglow_config_file, eval_datasets, eval_batch_size, eval_freq, cpu_per_dl=1,
 ):
     data_preprocessor, waveglow, _ = neural_modules
 
-    eval_dl_params = copy.deepcopy(waveglow_params["AudioDataLayer"])
-    eval_dl_params.update(waveglow_params["AudioDataLayer"]["eval"])
-    del eval_dl_params["train"]
-    del eval_dl_params["eval"]
-
     callbacks = []
     # assemble eval DAGs
     for eval_dataset in eval_datasets:
-        data_layer_eval = nemo_tts.AudioDataLayer(
-            manifest_filepath=eval_dataset, batch_size=eval_batch_size, num_workers=cpu_per_dl, **eval_dl_params,
+        data_layer_eval = nemo_tts.AudioDataLayer.import_from_config(
+            waveglow_config_file,
+            "AudioDataLayer_eval",
+            overwrite_params={
+                "manifest_filepath": eval_dataset,
+                "batch_size": eval_batch_size,
+                "num_workers": cpu_per_dl,
+            },
         )
 
         audio, audio_len, = data_layer_eval()
@@ -160,7 +165,7 @@ def create_eval_dags(
 def create_all_dags(
     neural_factory,
     neural_modules,
-    waveglow_params,
+    waveglow_config_file,
     train_dataset,
     batch_size,
     checkpoint_save_freq,
@@ -174,7 +179,7 @@ def create_all_dags(
     training_loss, training_callbacks, steps_per_epoch = create_train_dag(
         neural_factory=neural_factory,
         neural_modules=neural_modules,
-        waveglow_params=waveglow_params,
+        waveglow_config_file=waveglow_config_file,
         train_dataset=train_dataset,
         batch_size=batch_size,
         checkpoint_save_freq=checkpoint_save_freq,
@@ -186,7 +191,7 @@ def create_all_dags(
         eval_callbacks = create_eval_dags(
             neural_factory=neural_factory,
             neural_modules=neural_modules,
-            waveglow_params=waveglow_params,
+            waveglow_config_file=waveglow_config_file,
             eval_datasets=eval_datasets,
             eval_batch_size=eval_batch_size,
             eval_freq=eval_freq,
@@ -222,17 +227,14 @@ def main():
     if args.local_rank is not None:
         logging.info('Doing ALL GPU')
 
-    yaml = YAML(typ="safe")
-    with open(args.model_config) as file:
-        waveglow_params = yaml.load(file)
     # instantiate neural modules
-    neural_modules = create_NMs(waveglow_params)
+    neural_modules = create_NMs(args.model_config)
 
     # build dags
     train_loss, callbacks, steps_per_epoch = create_all_dags(
         neural_factory=neural_factory,
         neural_modules=neural_modules,
-        waveglow_params=waveglow_params,
+        waveglow_config_file=args.model_config,
         train_dataset=args.train_dataset,
         batch_size=args.batch_size,
         checkpoint_save_freq=args.checkpoint_save_freq,
diff --git a/examples/tts/waveglow_v0p9.py b/examples/tts/waveglow_v0p9.py
new file mode 100644
index 000000000000..2bc905ea6973
--- /dev/null
+++ b/examples/tts/waveglow_v0p9.py
@@ -0,0 +1,270 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Waveglow_v0p9.py is functionally the same as waveglow.py but with support for older model yaml configs. This file
+will be removed in future versions
+"""
+import argparse
+import copy
+import os
+from functools import partial
+
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+import nemo.collections.tts as nemo_tts
+import nemo.utils.argparse as nm_argparse
+from nemo.collections.tts import waveglow_eval_log_to_tb_func, waveglow_log_to_tb_func, waveglow_process_eval_batch
+from nemo.utils import logging
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        parents=[nm_argparse.NemoArgParser()], description='Waveglow', conflict_handler='resolve',
+    )
+    parser.set_defaults(
+        checkpoint_dir=None,
+        optimizer="adam",
+        batch_size=12,
+        eval_batch_size=12,
+        lr=0.0001,
+        amp_opt_level="O1",
+        create_tb_writer=True,
+        lr_policy=None,
+        weight_decay=1e-6,
+    )
+
+    # Overwrite default args
+    parser.add_argument("--max_steps", type=int, default=None, help="max number of steps to train")
+    parser.add_argument("--num_epochs", type=int, default=None, help="number of epochs to train")
+    parser.add_argument("--model_config", type=str, required=True, help="model configuration file: model.yaml")
+
+    # Create new args
+    parser.add_argument("--exp_name", default="Waveglow", type=str)
+
+    args = parser.parse_args()
+
+    if args.lr_policy:
+        raise NotImplementedError("Waveglow does not support lr policy arg")
+    if args.max_steps is not None and args.num_epochs is not None:
+        raise ValueError("Either max_steps or num_epochs should be provided.")
+    if args.eval_freq % 25 != 0:
+        raise ValueError("eval_freq should be a multiple of 25.")
+
+    exp_directory = [
+        f"{args.exp_name}-lr_{args.lr}-bs_{args.batch_size}",
+        "",
+        (f"-wd_{args.weight_decay}-opt_{args.optimizer}-ips_{args.iter_per_step}"),
+    ]
+    if args.max_steps:
+        exp_directory[1] = f"-s_{args.max_steps}"
+    elif args.num_epochs:
+        exp_directory[1] = f"-e_{args.num_epochs}"
+    else:
+        raise ValueError("Both max_steps and num_epochs were None.")
+    return args, "".join(exp_directory)
+
+
+def create_NMs(waveglow_params):
+    data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+        **waveglow_params["AudioToMelSpectrogramPreprocessor"]
+    )
+    waveglow = nemo_tts.WaveGlowNM(22050, **waveglow_params["WaveGlowNM"])
+    waveglow_loss = nemo_tts.WaveGlowLoss(sample_rate=22050)
+
+    logging.info('================================')
+    logging.info(f"Total number of parameters: {waveglow.num_weights}")
+    logging.info('================================')
+    return (data_preprocessor, waveglow, waveglow_loss)
+
+
+def create_train_dag(
+    neural_factory, neural_modules, waveglow_params, train_dataset, batch_size, checkpoint_save_freq, cpu_per_dl=1,
+):
+    data_preprocessor, waveglow, waveglow_loss = neural_modules
+
+    train_dl_params = copy.deepcopy(waveglow_params["AudioDataLayer"])
+    train_dl_params.update(waveglow_params["AudioDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+
+    data_layer = nemo_tts.AudioDataLayer(
+        manifest_filepath=train_dataset, batch_size=batch_size, num_workers=cpu_per_dl, **train_dl_params,
+    )
+
+    N = len(data_layer)
+    steps_per_epoch = int(N / (batch_size * neural_factory.world_size))
+    logging.info('Have {0} examples to train on.'.format(N))
+
+    # Train DAG
+    audio, audio_len, = data_layer()
+    spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
+
+    z, log_s_list, log_det_W_list = waveglow(mel_spectrogram=spec_target, audio=audio)
+    loss_t = waveglow_loss(z=z, log_s_list=log_s_list, log_det_W_list=log_det_W_list)
+
+    # Callbacks needed to print info to console and Tensorboard
+    train_callback = nemo.core.SimpleLossLoggerCallback(
+        tensors=[loss_t, z, spec_target, spec_target_len],
+        print_func=lambda x: logging.info(f"Loss: {x[0].data}"),
+        log_to_tb_func=partial(waveglow_log_to_tb_func, log_images=False),
+        tb_writer=neural_factory.tb_writer,
+    )
+
+    chpt_callback = nemo.core.CheckpointCallback(folder=neural_factory.checkpoint_dir, step_freq=checkpoint_save_freq)
+
+    callbacks = [train_callback, chpt_callback]
+    return loss_t, callbacks, steps_per_epoch
+
+
+def create_eval_dags(
+    neural_factory, neural_modules, waveglow_params, eval_datasets, eval_batch_size, eval_freq, cpu_per_dl=1,
+):
+    data_preprocessor, waveglow, _ = neural_modules
+
+    eval_dl_params = copy.deepcopy(waveglow_params["AudioDataLayer"])
+    eval_dl_params.update(waveglow_params["AudioDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+
+    callbacks = []
+    # assemble eval DAGs
+    for eval_dataset in eval_datasets:
+        data_layer_eval = nemo_tts.AudioDataLayer(
+            manifest_filepath=eval_dataset, batch_size=eval_batch_size, num_workers=cpu_per_dl, **eval_dl_params,
+        )
+
+        audio, audio_len, = data_layer_eval()
+        spec_target, spec_target_len = data_preprocessor(input_signal=audio, length=audio_len)
+
+        audio_pred, log_s_list, log_det_W_list = waveglow(mel_spectrogram=spec_target, audio=audio)
+
+        # create corresponding eval callback
+        tagname = os.path.basename(eval_dataset).split(".")[0]
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=[audio_pred, spec_target, spec_target_len],
+            user_iter_callback=waveglow_process_eval_batch,
+            user_epochs_done_callback=lambda x: x,
+            tb_writer_func=partial(waveglow_eval_log_to_tb_func, tag=tagname, mel_fb=data_preprocessor.filter_banks,),
+            eval_step=eval_freq,
+            tb_writer=neural_factory.tb_writer,
+        )
+
+        callbacks.append(eval_callback)
+    return callbacks
+
+
+def create_all_dags(
+    neural_factory,
+    neural_modules,
+    waveglow_params,
+    train_dataset,
+    batch_size,
+    checkpoint_save_freq,
+    eval_datasets=None,
+    eval_batch_size=None,
+    eval_freq=None,
+):
+    # Calculate num_workers for dataloader
+    cpu_per_dl = max(int(os.cpu_count() / neural_factory.world_size), 1)
+
+    training_loss, training_callbacks, steps_per_epoch = create_train_dag(
+        neural_factory=neural_factory,
+        neural_modules=neural_modules,
+        waveglow_params=waveglow_params,
+        train_dataset=train_dataset,
+        batch_size=batch_size,
+        checkpoint_save_freq=checkpoint_save_freq,
+        cpu_per_dl=cpu_per_dl,
+    )
+
+    eval_callbacks = []
+    if eval_datasets:
+        eval_callbacks = create_eval_dags(
+            neural_factory=neural_factory,
+            neural_modules=neural_modules,
+            waveglow_params=waveglow_params,
+            eval_datasets=eval_datasets,
+            eval_batch_size=eval_batch_size,
+            eval_freq=eval_freq,
+            cpu_per_dl=cpu_per_dl,
+        )
+    else:
+        logging.info("There were no val datasets passed")
+
+    callbacks = training_callbacks + eval_callbacks
+    return training_loss, callbacks, steps_per_epoch
+
+
+def main():
+    args, name = parse_args()
+
+    log_dir = name
+    if args.work_dir:
+        log_dir = os.path.join(args.work_dir, name)
+
+    # instantiate Neural Factory with supported backend
+    neural_factory = nemo.core.NeuralModuleFactory(
+        backend=nemo.core.Backend.PyTorch,
+        local_rank=args.local_rank,
+        optimization_level=args.amp_opt_level,
+        log_dir=log_dir,
+        checkpoint_dir=args.checkpoint_dir,
+        create_tb_writer=args.create_tb_writer,
+        files_to_copy=[args.model_config, __file__],
+        cudnn_benchmark=args.cudnn_benchmark,
+        tensorboard_dir=args.tensorboard_dir,
+    )
+
+    if args.local_rank is not None:
+        logging.info('Doing ALL GPU')
+
+    yaml = YAML(typ="safe")
+    with open(args.model_config) as file:
+        waveglow_params = yaml.load(file)
+    # instantiate neural modules
+    neural_modules = create_NMs(waveglow_params)
+
+    # build dags
+    train_loss, callbacks, steps_per_epoch = create_all_dags(
+        neural_factory=neural_factory,
+        neural_modules=neural_modules,
+        waveglow_params=waveglow_params,
+        train_dataset=args.train_dataset,
+        batch_size=args.batch_size,
+        checkpoint_save_freq=args.checkpoint_save_freq,
+        eval_datasets=args.eval_datasets,
+        eval_batch_size=args.eval_batch_size,
+        eval_freq=args.eval_freq,
+    )
+
+    # train model
+    neural_factory.train(
+        tensors_to_optimize=[train_loss],
+        callbacks=callbacks,
+        optimizer=args.optimizer,
+        optimization_params={
+            "num_epochs": args.num_epochs,
+            "max_steps": args.max_steps,
+            "lr": args.lr,
+            "weight_decay": args.weight_decay,
+            "grad_norm_clip": None,
+        },
+        batches_per_step=args.iter_per_step,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/nemo/__init__.py b/nemo/__init__.py
index f52c9bd66194..6025cf2815bc 100644
--- a/nemo/__init__.py
+++ b/nemo/__init__.py
@@ -33,9 +33,7 @@
 )
 
 if "NEMO_PACKAGE_BUILDING" not in os.environ:
-    from nemo.utils.nemo_logging import Logger as _Logger
-
-    logging = _Logger()
+    from nemo.utils import logging, logging_mode
 
     from nemo import backends
     from nemo import core
diff --git a/nemo/backends/__init__.py b/nemo/backends/__init__.py
index d4611fa8af89..dfb329d0d74e 100644
--- a/nemo/backends/__init__.py
+++ b/nemo/backends/__init__.py
@@ -1 +1,22 @@
-from . import pytorch
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from .load_backend import backend
+
+# Load backend specific classes, functions etc.
+if backend() == 'pytorch':
+    from .torch_backend import save, load, get_state_dict, set_state_dict
+    from . import pytorch
diff --git a/nemo/backends/load_backend.py b/nemo/backends/load_backend.py
new file mode 100644
index 000000000000..8b1c2db802af
--- /dev/null
+++ b/nemo/backends/load_backend.py
@@ -0,0 +1,26 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# Set the default backend to PyTorch.
+_BACKEND = 'pytorch'
+
+
+def backend() -> str:
+    """
+    Returns:
+        Name of the currently used backend.
+    """
+    return _BACKEND
diff --git a/nemo/backends/pytorch/actions.py b/nemo/backends/pytorch/actions.py
index 95ed41e553cf..d77b34b27acd 100644
--- a/nemo/backends/pytorch/actions.py
+++ b/nemo/backends/pytorch/actions.py
@@ -20,15 +20,16 @@
 from nemo.backends.pytorch.nm import DataLayerNM, TrainableNM
 from nemo.backends.pytorch.optimizers import AdamW, Novograd, master_params
 from nemo.core import DeploymentFormat, DeviceType, NeuralModule, NmTensor
-from nemo.core.callbacks import ActionCallback, EvaluatorCallback, SimpleLossLoggerCallback
-from nemo.core.neural_factory import Actions, ModelMode, Optimization
-from nemo.core.neural_types import *
+from nemo.core.actions import Actions, TrainingState, topological_sort_from_leaves
+from nemo.core.callbacks import ActionCallback, NeMoCallback, SimpleLossLoggerCallback
+from nemo.core.neural_factory import OperationMode, Optimization
+from nemo.core.neural_types import AxisKind, NeuralType
+from nemo.utils.app_state import AppState
+from nemo.utils.decorators import deprecated
 from nemo.utils.helpers import get_checkpoint_from_dir
 
 # these imports will happen on as-needed basis
 amp = None
-# convert_syncbn = None
-# create_syncbn_process_group = None
 LARC = None
 FusedLAMB = None
 FusedAdam = None
@@ -60,16 +61,12 @@ def __init__(
                     global amp
                     amp = importlib.import_module('apex.amp')
                 if local_rank is not None:
-                    # global convert_syncbn
-                    # global create_syncbn_process_group
                     global LARC
                     global FusedLAMB
                     global FusedAdam
                     global FusedNovoGrad
                     parallel = importlib.import_module('apex.parallel')
                     apex_optimizer = importlib.import_module('apex.optimizers')
-                    # convert_syncbn = parallel.convert_syncbn_model
-                    # create_syncbn_process_group = parallel.create_syncbn_process_group
                     LARC = parallel.LARC
                     FusedLAMB = apex_optimizer.FusedLAMB
                     FusedAdam = apex_optimizer.FusedAdam
@@ -87,136 +84,71 @@ def __init__(
             local_rank=local_rank, global_rank=global_rank, optimization_level=optimization_level,
         )
 
-        # will be [unique_instance_id -> (NMModule, PTModule)]
-        self.module_reference_table = {}
-        self.step = 0
-        self.epoch_num = 0
-        self.optimizers = []
+        self._step = 0
+        self._epoch = 0
+        self._optimizers = []
         self.tb_writer = tb_writer
-        self._modules = set()
         self.cache = None
         self.amp_initialized = False
+        self.ddp_initialized = False
+        self.ddp_module_dict = {}
+        self._train_called = False
 
     @property
-    def modules(self):
-        return self._modules
+    def step(self):
+        return self._step
 
-    def __get_top_sorted_modules_and_dataloader(self, hook):
-        """
-        Constructs DAG leading to hook and creates its topological order.
-        It also populates self.module_reference_table.
-        Args:
-          hook: an NmTensor or a list of NmTensors representing leaf nodes
-          in DAG
+    @step.setter
+    def step(self, step):
+        self._step = step
 
-        Returns:
-          list of modules with their call arguments and outputs, and dataset
-        """
+    @property
+    def epoch(self):
+        return self._epoch
 
-        def create_node(producer, producer_args):
-            if producer_args is None:
-                return tuple((producer, ()))
-            else:
-                return tuple((producer, tuple([(k, v) for k, v in producer_args.items()]),))
-
-        def is_in_degree_zero(node, processed_nodes):
-            """A node has in degree of zero"""
-            if node[1] == ():
-                return True
-            for portname, nmtensor in node[1]:
-                nd = create_node(nmtensor.producer, nmtensor.producer_args)
-                if nd not in processed_nodes:
-                    return False
-            return True
-
-        hooks = hook if isinstance(hook, list) else [hook]
-
-        # ensures that no tensors are processed twice
-        processed_nmtensors = set()
-
-        indices_to_remove = []
-        # Check for duplicates in hook
-        for i, nmtensor in enumerate(hook):
-            if nmtensor in processed_nmtensors:
-                indices_to_remove.append(i)
-            else:
-                processed_nmtensors.add(nmtensor)
-
-        for i in reversed(indices_to_remove):
-            hook.pop(i)
-
-        _top_sorted_modules = []
-        all_nodes = {}
-
-        # extract all nodes to all_nodes set
-        hooks_lst = list(hooks)
-        while len(hooks_lst) > 0:
-            # take nmtensor from the end of the list
-            nmtensor = hooks_lst.pop()
-            node = create_node(nmtensor.producer, nmtensor.producer_args)
-            # Store nmtensor as an output of its producer
-            # first make sure all keys are present per output port
-            # and nm is inside all_nodes
-            if node not in all_nodes:
-                all_nodes[node] = {k: None for k in nmtensor.producer.output_ports}
-            # second, populate output port with current nmtensor
-            # where applicable
-            all_nodes[node][nmtensor.name] = nmtensor
-            processed_nmtensors.add(nmtensor)
-            if nmtensor.producer_args is not None and nmtensor.producer_args != {}:
-                for _, new_nmtensor in nmtensor.producer_args.items():
-                    if new_nmtensor not in processed_nmtensors:
-                        # put in the start of list
-                        hooks_lst.insert(0, new_nmtensor)
-
-        all_node_with_output = []
-        # Iterate over all_nodes to create new nodes that include its output
-        # now all nodes have (module, input tensors, output tensors)
-        for node in all_nodes:
-            all_node_with_output.append(tuple((node[0], node[1], all_nodes[node])))
-
-        processed_nodes = []
-        while len(all_node_with_output) > 0:
-            for node in all_node_with_output.copy():
-                # if node's in_degree is zero it can be added to
-                # _top_sorted_modules
-                # this will also reduce in_degree of its children
-                if is_in_degree_zero(node, processed_nodes):
-                    _top_sorted_modules.append(node)
-                    processed_nodes.append((node[0], node[1]))
-                    all_node_with_output.remove(node)
-
-        # Create top_sorted_modules aka callchain
-        top_sorted_modules = []
-        for i, m in enumerate(_top_sorted_modules):
-            top_sorted_modules.append((m[0], dict(m[1]), m[2]))
-            # Ensure that there is only one dataset in callchain
-            if i > 0 and isinstance(m[0], DataLayerNM):
-                raise ValueError("There were more than one DataLayer NeuralModule inside " "your DAG.")
+    @epoch.setter
+    def epoch(self, epoch):
+        self._epoch = epoch
+
+    @property
+    @deprecated(version="0.12", explanation="epoch_num has been deprecated in favour of epoch.")
+    def epoch_num(self):
+        return self._epoch
+
+    @epoch_num.setter
+    @deprecated(version="0.12", explanation="epoch_num has been deprecated in favour of epoch.")
+    def epoch_num(self):
+        return self._epoch
+
+    @property
+    def optimizers(self):
+        return self._optimizers
+
+    def __get_top_sorted_modules_and_dataloader(self, hook: List[NmTensor]):
+        """A function that accepts a list of NmTensors that need to be computed and constructs a call DAG that starts
+        from a datalayerNM and can be used to compute the NmTensors.
+
+        args:
+            leaf_nmtensors (List[NmTensors]): The tensors to be computed
+
+        returns:
+            top_sorted_modules: the callchain DAG
+            tdataset: the datalayer at the top of the callchain
+        """
+        top_sorted_modules = topological_sort_from_leaves(hook)
 
         if not isinstance(top_sorted_modules[0][0], DataLayerNM):
-            raise ValueError("The first module in your DAG was not a DataLayer " "NeuralModule.")
+            raise ValueError("The first module in your DAG was not a DataLayer NeuralModule.")
 
         tdataset = top_sorted_modules[0][0].dataset
 
-        # populate self.module_reference_table
         for m in top_sorted_modules:
             if m[0].factory is None and self._local_rank is not None:
                 raise ValueError(
-                    "Neural module {0} was created without "
-                    "NeuralModuleFactory, but you are trying to"
-                    "run in distributed mode. Please instantiate"
-                    "NeuralModuleFactory first and pass its "
-                    "instance as `factory` parameter to all your"
-                    "Neural Module objects."
-                    "".format(str(m[0]))
+                    "Neural module {0} was created without NeuralModuleFactory, but you are trying to run in "
+                    "distributed mode. Please instantiate NeuralModuleFactory first and pass its instance as "
+                    "`factory` parameter to all your Neural Module objects.".format(str(m[0]))
                 )
-            key = m[0].unique_instance_id
-            if key not in self.module_reference_table:
-                if isinstance(m[0], TrainableNeuralModuleWrapper):
-                    self.module_reference_table[key] = (m[0], m[0]._pt_module)
-                else:
-                    self.module_reference_table[key] = (m[0], m[0])
 
         return top_sorted_modules, tdataset
 
@@ -243,7 +175,7 @@ def create_optimizer(self, optimizer, things_to_optimize, optimizer_params=None)
         elif isinstance(optimizer, torch.optim.Optimizer):
             optimizer_instance = optimizer
         else:
-            raise ValueError("`optimizer` must be a string or an instance " "of torch.optim.Optimizer")
+            raise ValueError("`optimizer` must be a string or an instance of torch.optim.Optimizer")
 
         modules_to_optimize = []
         tensors_to_optimize = []
@@ -256,7 +188,7 @@ def create_optimizer(self, optimizer, things_to_optimize, optimizer_params=None)
                 tensors_to_optimize.append(thing)
             else:
                 raise ValueError(
-                    "{} passed to create_optimizer() was neither " "a neural module nor a neural module tensor"
+                    "{} passed to create_optimizer() was neither a neural module nor a neural module tensor"
                 )
 
         if tensors_to_optimize:
@@ -280,7 +212,7 @@ def create_optimizer(self, optimizer, things_to_optimize, optimizer_params=None)
             params_to_optimize=params_to_optimize,
         )
 
-        self.optimizers.append(optimizer)
+        self._optimizers.append(optimizer)
         return optimizer
 
     @staticmethod
@@ -303,7 +235,14 @@ def __setup_optimizer(
                     params=params_to_optimize, lr=lr, betas=optimization_params.get("betas", (0.9, 0.999)),
                 )
             elif optimizer_class.lower() == "fused_adam":
-                optimizer = FusedAdam(params=params_to_optimize, lr=lr)
+                if not FusedAdam:
+                    raise ValueError("FusedAdam works only with torch DDP.")
+                optimizer = FusedAdam(
+                    params=params_to_optimize,
+                    lr=lr,
+                    weight_decay=optimization_params.get("weight_decay", 0.0),
+                    betas=optimization_params.get("betas", (0.9, 0.999)),
+                )
             elif optimizer_class.lower() == "adam_w":
                 optimizer = AdamW(
                     params=params_to_optimize,
@@ -321,6 +260,8 @@ def __setup_optimizer(
                     betas=optimization_params.get("betas", (0.95, 0.25)),
                 )
             elif optimizer_class.lower() == "fused_novograd":
+                if not FusedNovoGrad:
+                    raise ValueError("FusedNovoGrad works only with torch DDP.")
                 optimizer = FusedNovoGrad(
                     params_to_optimize,
                     lr=lr,
@@ -330,21 +271,24 @@ def __setup_optimizer(
                     betas=optimization_params.get("betas", (0.95, 0.25)),
                 )
             elif optimizer_class.lower() == "fused_lamb":
+                if not FusedLAMB:
+                    raise ValueError("FusedLAMB works only with torch DDP.")
                 optimizer = FusedLAMB(params_to_optimize, lr=lr,)
             else:
                 raise ValueError("Unknown optimizer class: {0}".format(optimizer_class))
 
             if optimization_params.get("larc", False):
+                if not LARC:
+                    raise ValueError("LARC works only with torch DDP.")
                 logging.info("Enabling larc")
                 optimizer = LARC(optimizer, trust_coefficient=optimization_params.get("larc_eta", 2e-2),)
         else:
             logging.info("Optimizer instance: {0} is provided.")
             if optimizer_class is not None and optimizer_class != "":
-                logging.warning("Ignoring `optimizer_class` parameter because" "`optimizer_instance` is provided")
+                logging.warning("Ignoring `optimizer_class` parameter because `optimizer_instance` is provided")
             if optimization_params is not None and optimization_params != {}:
                 logging.warning(
-                    "Ignoring `optimization_params` parameter for "
-                    "optimizer because `optimizer_instance` is provided"
+                    "Ignoring `optimization_params` parameter for optimizer because `optimizer_instance` is provided"
                 )
             optimizer = optimizer_instance
         return optimizer
@@ -353,15 +297,15 @@ def __initialize_amp(
         self, optimizer, optim_level, amp_max_loss_scale=2.0 ** 24, amp_min_loss_scale=1.0,
     ):
         if optim_level not in AmpOptimizations:
-            raise ValueError(f"__initialize_amp() was called with unknown " "optim_level={optim_level}")
+            raise ValueError(f"__initialize_amp() was called with unknown optim_level={optim_level}")
         # in this case, nothing to do here
         if optim_level == Optimization.mxprO0:
             return optimizer
 
-        if len(self.modules) < 1:
+        if len(AppState().modules) < 1:
             raise ValueError("There were no modules to initialize")
         pt_modules = []
-        for module in self.modules:
+        for module in AppState().modules:
             if isinstance(module, nn.Module):
                 pt_modules.append(module)
             elif isinstance(module, TrainableNeuralModuleWrapper):
@@ -377,8 +321,11 @@ def __initialize_amp(
         self.amp_initialized = True
         return optimizer
 
+    def nm_graph_forward_pass(self, callchain, registered_tensors):
+        self.__nm_graph_forward_pass(callchain, registered_tensors)
+
     def __nm_graph_forward_pass(
-        self, call_chain, registered_tensors, mode=ModelMode.train, use_cache=False,
+        self, call_chain, registered_tensors, mode=OperationMode.training, use_cache=False,
     ):
         for ind in range(1, len(call_chain)):
             if use_cache:
@@ -394,38 +341,25 @@ def __nm_graph_forward_pass(
                 if in_cache:
                     continue
             call_args = call_chain[ind][1]
-            # module = call_chain[ind][0]
             m_id = call_chain[ind][0].unique_instance_id
-            pmodule = self.module_reference_table[m_id][1]
-
-            # if self._local_rank is not None:
-            #     if isinstance(pmodule, DDP):
-            #         if disable_allreduce:
-            #             pmodule.disable_allreduce()
-            #         else:
-            #             pmodule.enable_allreduce()
+            pmodule = self.ddp_module_dict[m_id] if self.ddp_initialized else call_chain[ind][0]
 
-            if mode == ModelMode.train:
+            if mode == OperationMode.training:
                 # if module.is_trainable():
                 if isinstance(pmodule, nn.Module):
                     pmodule.train()
-            elif mode == ModelMode.eval:
+            elif mode == OperationMode.evaluation:
                 # if module.is_trainable():
                 if isinstance(pmodule, nn.Module):
                     pmodule.eval()
             else:
-                raise ValueError("Unknown ModelMode")
+                raise ValueError("Unknown OperationMode")
             # prepare call signature for `module`
             call_set = {}
             for tensor_name, nmtensor in call_args.items():
-                # _add_uuid_2_name(nmtensor.name, nmtensor.producer._uuid)
                 key = nmtensor.unique_name
                 call_set[tensor_name] = registered_tensors[key]
-            # actual PyTorch module call with signature
-            if isinstance(self.module_reference_table[m_id][0], TrainableNeuralModuleWrapper,):
-                new_tensors = pmodule(**call_set)
-            else:
-                new_tensors = pmodule(force_pt=True, **call_set)
+            new_tensors = pmodule(force_pt=True, **call_set)
 
             if not isinstance(new_tensors, List):
                 if not isinstance(new_tensors, tuple):
@@ -436,10 +370,10 @@ def __nm_graph_forward_pass(
                 if nm_tensor is None:
                     continue
                 t_name = nm_tensor.unique_name
-                if t_name not in registered_tensors:
+                if t_name not in registered_tensors or registered_tensors[t_name] is None:
                     registered_tensors[t_name] = t_tensor
                 else:
-                    raise ValueError("A NMTensor was produced twice in " f"the same DAG. {t_name}")
+                    raise ValueError(f"A NMTensor was produced twice in the same DAG. {t_name}")
 
     @staticmethod
     def pad_tensor(t: torch.Tensor, target_size: torch.Size):
@@ -505,36 +439,43 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # logging.info(
-                #     "Doing distributed evaluation. Rank {0} of {1}".format(
-                #         self.local_rank, world_size
-                #     )
-                # )
+
                 if dl_nm.dataset is not None:
-                    sampler = torch.utils.data.distributed.DistributedSampler(
-                        dataset=dl_nm.dataset, shuffle=dl_nm.shuffle
-                    )
-                    eval_dataloader = torch.utils.data.DataLoader(
-                        dataset=dl_nm.dataset,
-                        sampler=sampler,
-                        num_workers=dl_nm.num_workers,
-                        batch_size=dl_nm.batch_size,
-                        shuffle=False,
-                    )
+                    sampler = None
+                    if not isinstance(dl_nm.dataset, torch.utils.data.IterableDataset):
+                        sampler = torch.utils.data.distributed.DistributedSampler(
+                            dataset=dl_nm.dataset, shuffle=dl_nm.shuffle
+                        )
+                    dataloader_params = {
+                        'dataset': dl_nm.dataset,
+                        'sampler': sampler,
+                        'num_workers': dl_nm.num_workers,
+                        'batch_size': dl_nm.batch_size,
+                        'shuffle': False,
+                        'pin_memory': dl_nm.pin_memory,
+                    }
+                    if hasattr(dl_nm, 'collate_fn'):
+                        dataloader_params['collate_fn'] = dl_nm.collate_fn
+                    eval_dataloader = torch.utils.data.DataLoader(**dataloader_params)
                 else:
                     eval_dataloader = dl_nm.data_iterator
+
                 if hasattr(eval_dataloader, 'sampler'):
                     eval_dataloader.sampler.set_epoch(0)
             else:  # Not distributed
                 if dl_nm.dataset is not None:
                     # Todo: remove local_parameters
-                    eval_dataloader = torch.utils.data.DataLoader(
-                        dataset=dl_nm.dataset,
-                        sampler=None,  # not distributed sampler
-                        num_workers=dl_nm.num_workers,
-                        batch_size=dl_nm.batch_size,
-                        shuffle=dl_nm.shuffle,
-                    )
+                    dataloader_params = {
+                        'dataset': dl_nm.dataset,
+                        'sampler': None,  # not distributed sampler
+                        'num_workers': dl_nm.num_workers,
+                        'batch_size': dl_nm.batch_size,
+                        'shuffle': dl_nm.shuffle,
+                        'pin_memory': dl_nm.pin_memory,
+                    }
+                    if hasattr(dl_nm, 'collate_fn'):
+                        dataloader_params['collate_fn'] = dl_nm.collate_fn
+                    eval_dataloader = torch.utils.data.DataLoader(**dataloader_params)
                 else:
                     eval_dataloader = dl_nm.data_iterator
             # after this eval_dataloader is ready to be used
@@ -545,9 +486,15 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
             dl_device = dl_nm._device
 
             # Evaluation mini-batch for loop
-            num_batches = len(eval_dataloader)
+            num_batches = None
+            if hasattr(eval_dataloader, "__len__"):
+                num_batches = len(eval_dataloader)
             for epoch_i, data in enumerate(eval_dataloader, 0):
-                if verbose and (num_batches < 10 or (epoch_i % int(num_batches / 10) == 0)):
+                if (
+                    verbose
+                    and num_batches is not None
+                    and (num_batches < 10 or (epoch_i % int(num_batches / 10) == 0))
+                ):
                     logging.info(f"Evaluating batch {epoch_i} out of {num_batches}")
                 tensors = []
                 if isinstance(data, torch.Tensor):
@@ -562,7 +509,7 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                     t.unique_name: d for t, d in zip(call_chain[0][2].values(), tensors) if t is not None
                 }
                 self.__nm_graph_forward_pass(
-                    call_chain=call_chain, registered_tensors=registered_e_tensors, mode=ModelMode.eval,
+                    call_chain=call_chain, registered_tensors=registered_e_tensors, mode=OperationMode.evaluation,
                 )
 
                 if not is_distributed or self.global_rank == 0:
@@ -572,7 +519,7 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
                 for t2e in tensors_2_evaluate:
                     key = t2e.unique_name
                     if key not in registered_e_tensors.keys():
-                        logging.info("WARNING: Tensor {} was not found during " "eval".format(key))
+                        logging.info("WARNING: Tensor {} was not found during eval".format(key))
                         continue
                     if is_distributed:
                         # where we will all_gather results from all workers
@@ -618,13 +565,16 @@ def _eval(self, tensors_2_evaluate, callback, step, verbose=False):
             # should happend only on one worker
             if callback.user_done_callback and (self.global_rank is None or self.global_rank == 0):
                 vals_to_log = callback.user_done_callback(callback._global_var_dict)
-                # log results to Tensorboard
-                if vals_to_log is not None and callback.swriter is not None:
-                    if callback.tb_writer_func is not None:
-                        callback.tb_writer_func(callback.swriter, vals_to_log, step)
-                    else:
-                        for key, val in vals_to_log.items():
-                            callback.swriter.add_scalar(key, val, step)
+                # log results to Tensorboard or Weights & Biases
+                if vals_to_log is not None:
+                    if hasattr(callback, 'swriter') and callback.swriter is not None:
+                        if hasattr(callback, 'tb_writer_func') and callback.tb_writer_func is not None:
+                            callback.tb_writer_func(callback.swriter, vals_to_log, step)
+                        else:
+                            for key, val in vals_to_log.items():
+                                callback.swriter.add_scalar(key, val, step)
+                    if hasattr(callback, 'wandb_log'):
+                        callback.wandb_log(vals_to_log)
 
     def _infer(
         self, tensors_to_return, verbose=False, cache=False, use_cache=False, offload_to_cpu=True,
@@ -635,7 +585,7 @@ def _infer(
         # Checking that cache is used properly
         if cache and use_cache:
             raise ValueError(
-                "cache and use_cache were both set. However cache" " must first be created prior to using it."
+                "cache and use_cache were both set. However cache must first be created prior to using it."
             )
         if cache:
             if self.cache is not None:
@@ -662,22 +612,23 @@ def _infer(
                 assert dist.is_initialized()
                 is_distributed = True
                 world_size = torch.distributed.get_world_size()
-                # logging.info(
-                #     "Doing distributed evaluation. Rank {0} of {1}".format(
-                #         self.local_rank, world_size
-                #     )
-                # )
                 if dl_nm.dataset is not None:
-                    sampler = torch.utils.data.distributed.DistributedSampler(
-                        dataset=dl_nm.dataset, shuffle=dl_nm.shuffle
-                    )
-                    eval_dataloader = torch.utils.data.DataLoader(
-                        dataset=dl_nm.dataset,
-                        sampler=sampler,
-                        num_workers=dl_nm.num_workers,
-                        batch_size=dl_nm.batch_size,
-                        shuffle=False,
-                    )
+                    sampler = None
+                    if not isinstance(dl_nm.dataset, torch.utils.data.IterableDataset):
+                        sampler = torch.utils.data.distributed.DistributedSampler(
+                            dataset=dl_nm.dataset, shuffle=dl_nm.shuffle
+                        )
+                    dataloader_params = {
+                        'dataset': dl_nm.dataset,
+                        'sampler': sampler,
+                        'num_workers': dl_nm.num_workers,
+                        'batch_size': dl_nm.batch_size,
+                        'shuffle': False,
+                        'pin_memory': dl_nm.pin_memory,
+                    }
+                    if hasattr(dl_nm, 'collate_fn'):
+                        dataloader_params['collate_fn'] = dl_nm.collate_fn
+                    eval_dataloader = torch.utils.data.DataLoader(**dataloader_params)
                 else:
                     eval_dataloader = dl_nm.data_iterator
                 eval_dataloader.sampler.set_epoch(0)
@@ -686,13 +637,17 @@ def _infer(
                 # When caching, the DAG must cache all outputs from dataloader
                 if dl_nm.dataset is not None:
                     # Todo: remove local_parameters
-                    eval_dataloader = torch.utils.data.DataLoader(
-                        dataset=dl_nm.dataset,
-                        sampler=None,  # not distributed sampler
-                        num_workers=dl_nm.num_workers,
-                        batch_size=dl_nm.batch_size,
-                        shuffle=dl_nm.shuffle,
-                    )
+                    dataloader_params = {
+                        'dataset': dl_nm.dataset,
+                        'sampler': None,  # not distributed sampler
+                        'num_workers': dl_nm.num_workers,
+                        'batch_size': dl_nm.batch_size,
+                        'shuffle': dl_nm.shuffle,
+                        'pin_memory': dl_nm.pin_memory,
+                    }
+                    if hasattr(dl_nm, 'collate_fn'):
+                        dataloader_params['collate_fn'] = dl_nm.collate_fn
+                    eval_dataloader = torch.utils.data.DataLoader(**dataloader_params)
                 else:
                     eval_dataloader = dl_nm.data_iterator
             # after this eval_dataloader is ready to be used
@@ -714,7 +669,6 @@ def _infer(
                 loop_iterator = eval_dataloader
 
             for epoch_i, data in enumerate(loop_iterator, 0):
-                logging.debug(torch.cuda.memory_allocated())
                 if verbose and (num_batches < 10 or (epoch_i % int(num_batches / 10) == 0)):
                     logging.info(f"Evaluating batch {epoch_i} out of {num_batches}")
                 tensors = []
@@ -742,16 +696,10 @@ def _infer(
                 self.__nm_graph_forward_pass(
                     call_chain=call_chain,
                     registered_tensors=registered_e_tensors,
-                    mode=ModelMode.eval,
+                    mode=OperationMode.evaluation,
                     use_cache=use_cache,
                 )
 
-                # if offload_to_cpu:
-                #     # Take all cuda tensors and save them to value_dict as
-                #     # cpu tensors to save GPU memory
-                #     for name, tensor in registered_e_tensors.items():
-                #         if isinstance(tensor, torch.Tensor):
-                #             registered_e_tensors[name] = tensor.cpu()
                 if cache:
                     self.append_to_cache(registered_e_tensors, offload_to_cpu)
 
@@ -760,7 +708,7 @@ def _infer(
                 for t2e in tensors_to_return:
                     key = t2e.unique_name
                     if key not in registered_e_tensors.keys():
-                        logging.info("WARNING: Tensor {} was not found during " "eval".format(key))
+                        logging.info("WARNING: Tensor {} was not found during eval".format(key))
                         continue
                     if is_distributed:
                         # where we will all_gather results from all workers
@@ -837,8 +785,8 @@ def save_state_to(self, path: str):
         """
         state = {
             "step": self.step,
-            "epoch_num": self.epoch_num,
-            "optimizer_state": [opt.state_dict() for opt in self.optimizers],
+            "epoch": self.epoch,
+            "optimizer_state": [opt.state_dict() for opt in self._optimizers],
         }
         torch.save(state, path)
 
@@ -853,13 +801,13 @@ def restore_state_from(self, path: str):
         """
         if os.path.isfile(path):
             # map_location could be cuda:<device_id> but cpu seems to be more
-            # general since we are also saving step and epoch_num
+            # general since we are also saving step and epoch
             # load_state_dict should move the variables to the relevant device
             checkpoint = torch.load(path, map_location="cpu")
             self.step = checkpoint["step"]
-            self.epoch_num = checkpoint["epoch_num"]
+            self.epoch = checkpoint["epoch"]
             if checkpoint["optimizer_state"]:
-                for opt, opt_chkpt in zip(self.optimizers, checkpoint["optimizer_state"]):
+                for opt, opt_chkpt in zip(self._optimizers, checkpoint["optimizer_state"]):
                     opt.load_state_dict(opt_chkpt)
         else:
             raise FileNotFoundError("Could not find checkpoint file: {0}".format(path))
@@ -885,35 +833,8 @@ def _check_tuples(list_of_tuples):
                 return False
         return True
 
-    def _get_all_modules(self, training_loop, callbacks, logging_callchain=None):
-        """Gets all neural modules that will be used by train() and eval() via
-        EvaluatorCallbacks. Saves all modules to self.modules
-        """
-        # If there is a SimpleLossLoggerCallback, create an logger_callchain
-        # with all callchains from training_loop and
-        # SimpleLossLoggerCallback.tensors
-        if logging_callchain:
-            for module in logging_callchain:
-                self.modules.add(module[0])
-
-        # Else grab all callchains from training_loop
-        else:
-            for step in training_loop:
-                for module in step[2]:
-                    self.modules.add(module[0])
-
-        # Lastly, grab all eval modules
-        if callbacks is not None:
-            for callback in callbacks:
-                if isinstance(callback, EvaluatorCallback):
-                    (callchain, _,) = self.__get_top_sorted_modules_and_dataloader(hook=callback.eval_tensors)
-                    for module in callchain:
-                        self.modules.add(module[0])
-
     @staticmethod
-    def __module_export(
-        module, output, d_format: DeploymentFormat, input_example=None, output_example=None,
-    ):
+    def __module_export(module, output, d_format: DeploymentFormat, input_example=None, output_example=None):
         # Check if output already exists
         destination = Path(output)
         if destination.exists():
@@ -929,25 +850,17 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
                     if axis.kind == AxisKind.Batch or axis.kind == AxisKind.Time:
                         dynamic_axes[port_name].append(ind)
 
-        # This is a hack for Jasper to Jarvis export -- need re-design for this
-        inputs_to_drop = set()
-        outputs_to_drop = set()
-        if type(module).__name__ == "JasperEncoder":
-            logging.info(
-                "Module is JasperEncoder. We are removing input and output length ports since they are not needed for "
-                "deployment"
-            )
-            inputs_to_drop.add("length")
-            outputs_to_drop.add("encoded_lengths")
-
+        # extract dynamic axes and remove unnecessary inputs/outputs
         # for input_ports
         for port_name, ntype in module.input_ports.items():
-            if port_name in inputs_to_drop:
+            if port_name in module._disabled_deployment_input_ports:
+                input_names.remove(port_name)
                 continue
             __extract_dynamic_axes(port_name, ntype, dynamic_axes)
         # for output_ports
         for port_name, ntype in module.output_ports.items():
-            if port_name in outputs_to_drop:
+            if port_name in module._disabled_deployment_output_ports:
+                output_names.remove(port_name)
                 continue
             __extract_dynamic_axes(port_name, ntype, dynamic_axes)
 
@@ -957,12 +870,6 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
         # Make a deep copy of init parameters.
         init_params_copy = copy.deepcopy(module._init_params)
 
-        # Remove NeMo-related things from the module
-        # We need to change __call__ method. Note that this will change the
-        # whole class, not just this object! Which is why we need to repair it
-        # in the finally block
-        type(module).__call__ = torch.nn.Module.__call__
-
         # Reset standard instance field - making the file (probably) lighter.
         module._init_params = None
         module._placement = None
@@ -971,6 +878,13 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
 
         module.eval()
         try:
+            # Remove NeMo-related things from the module
+            # We need to change __call__ method. Note that this will change the
+            # whole class, not just this object! Which is why we need to repair it
+            # in the finally block
+            __orig_call__ = type(module).__call__
+            type(module).__call__ = torch.nn.Module.__call__
+
             if d_format == DeploymentFormat.TORCHSCRIPT:
                 if input_example is None:
                     # Route 1 - via torch.jit.script
@@ -980,7 +894,7 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
                     # Route 2 - via tracing
                     traced_m = torch.jit.trace(module, input_example)
                     traced_m.save(output)
-            elif d_format == DeploymentFormat.ONNX:
+            elif d_format == DeploymentFormat.ONNX or d_format == DeploymentFormat.TRTONNX:
                 if input_example is None:
                     raise ValueError(f'Example input is None, but ONNX tracing was' f' attempted')
                 if output_example is None:
@@ -997,11 +911,11 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
                     output,
                     input_names=input_names,
                     output_names=output_names,
-                    verbose=True,
+                    verbose=False,
                     export_params=True,
                     do_constant_folding=True,
                     dynamic_axes=dynamic_axes,
-                    opset_version=10,
+                    opset_version=11,
                     example_outputs=output_example,
                 )
                 # fn = output + ".readable"
@@ -1022,20 +936,10 @@ def __extract_dynamic_axes(port_name: str, ntype: NeuralType, dynamic_axes: defa
         except Exception as e:  # nopep8
             logging.error(f'module export failed for {module} ' f'with exception {e}')
         finally:
-
-            def __old_call__(self, force_pt=False, *input, **kwargs):
-                pt_call = len(input) > 0 or force_pt
-                if pt_call:
-                    return nn.Module.__call__(self, *input, **kwargs)
-                else:
-                    return NeuralModule.__call__(self, **kwargs)
-
-            type(module).__call__ = __old_call__
+            type(module).__call__ = __orig_call__
 
     @staticmethod
-    def deployment_export(
-        module, output: str, d_format: DeploymentFormat, input_example=None, output_example=None,
-    ):
+    def deployment_export(module, output: str, d_format: DeploymentFormat, input_example=None, output_example=None):
         """Exports Neural Module instance for deployment.
 
         Args:
@@ -1047,6 +951,7 @@ def deployment_export(
             amp_max_loss_scale (float): Max value for amp loss scaling.
                 Defaults to 2.0**24.
         """
+
         with torch.no_grad():
             PtActions.__module_export(
                 module=module,
@@ -1058,7 +963,8 @@ def deployment_export(
 
     def train(
         self,
-        tensors_to_optimize,
+        tensors_to_optimize=None,
+        training_graph=None,
         optimizer=None,
         optimization_params=None,
         callbacks: Optional[List[ActionCallback]] = None,
@@ -1070,6 +976,200 @@ def train(
         gradient_predivide=False,
         amp_max_loss_scale=2.0 ** 24,
     ):
+        def _perform_on_step_start(callbacks, state):
+            # TODO: Most of these checks can be relaxed since we enforce callbacks
+            # to be a list of ActionCallback objects
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_iteration_start()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_step_start(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_step_end(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_iteration_end()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_step_end(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_action_start(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_action_start()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_train_start(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_action_end(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_action_end()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_train_end(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_epoch_start(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_epoch_start()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_epoch_start(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_epoch_end(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.on_epoch_end()
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_epoch_end(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_batch_start(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        continue
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_batch_start(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _perform_on_batch_end(callbacks, state):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        continue
+                    elif isinstance(callback, NeMoCallback):
+                        callback.on_batch_end(state)
+                    else:
+                        raise ValueError(
+                            "Callback was not a child of ActionCallback nor NeMoCallback and was not understood"
+                        )
+
+        def _init_callbacks(callbacks, action):
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback.action = action
+
+        def _update_callbacks(callbacks=None, registered_tensors=None, final_loss=None):
+            # if self.local_rank is None or self.local_rank == 0:
+            if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
+                for callback in callbacks:
+                    if isinstance(callback, ActionCallback):
+                        callback._registered_tensors = registered_tensors
+                    else:  # For now, we can use the old callback function. In the future we should improve this
+                        registered_tensors["loss"] = final_loss
+
+        def get_state(action: 'PtAction'):
+            """Helper function used to create a state for callbacks
+            """
+
+            class StateWrapper(dict):
+                def __init__(self, action):
+                    """A class that wraps a dictionary but adds the functions: restore_state_from and save_state_to
+                    which are helper functions for CheckpointCallback to use.
+                    The StateWrapper is a dictionary that contains the following mapping:
+                        "step" (int): the current training step
+                        "epoch" (int): the current epoch step
+                        "local_rank" (int): the local rank that the process is running on
+                        "global_rank" (int): the global rank that the process is running on
+                        "optimizers" (list): a list of optimizers defined during the training process
+                        "tensors" (TrainingState): A TrainingState object that can be used to access tensor values
+                    """
+                    self.action = action
+                    super().__init__(
+                        {
+                            "step": action.step,
+                            "tensors": action._training_state,
+                            "epoch": action.epoch,
+                            "local_rank": action.local_rank,
+                            "global_rank": action.global_rank,
+                            "optimizers": action.optimizers,
+                        }
+                    )
+
+                def restore_state_from(self, path):
+                    if os.path.isfile(path):
+                        # map_location could be cuda:<device_id> but cpu seems to be more
+                        # general since we are also saving step and epoch
+                        # load_state_dict should move the variables to the relevant device
+                        checkpoint = torch.load(path, map_location="cpu")
+                        action.step = checkpoint["step"]
+                        self["step"] = action.step
+                        epoch = checkpoint.get("epoch", None)
+                        if epoch is None:
+                            epoch = checkpoint.get("epoch_num", None)
+                        if epoch is None:
+                            raise ValueError("Epoch was not found in the trainer checkpoint")
+                        action.epoch = epoch
+                        self["epoch"] = action.epoch
+                        if checkpoint["optimizer_state"]:
+                            for opt, opt_chkpt in zip(self["optimizers"], checkpoint["optimizer_state"]):
+                                opt.load_state_dict(opt_chkpt)
+                    else:
+                        raise FileNotFoundError("Could not find checkpoint file: {0}".format(path))
+
+                def save_state_to(self, path):
+                    state = {
+                        "step": self["step"],
+                        "epoch": self["epoch"],
+                        "optimizer_state": [opt.state_dict() for opt in self["optimizers"]],
+                    }
+                    torch.save(state, path)
+
+            return StateWrapper(action)
+
+        if self._train_called:
+            logging.warning(
+                "You called train twice. Please note that we do not support calling training twice in one script if "
+                "amp or ddp is used. If you wish to call train twice, you need to run "
+                "`nemo.utils.app_state.AppState().modules.clear(); neural_factory.reset_trainer()` and then "
+                "reinstantiate all Neural Modules prior to calling train()"
+            )
+        self._train_called = True
+
+        self._training_state = TrainingState(self)
+        # Analyse the arguments passed to train.
+        if tensors_to_optimize is not None and training_graph is not None:
+            raise ValueError("Cannot pass both `tensors_to_optimize` and `training_graph` to the train() function")
+        # if tensors_to_optimize is None and training_graph is None:
+        #    raise ValueError(
+        #        "One of the `tensors_to_optimize` or `training_graph` values must be passed to the train() function"
+        #    )
+        # Finally, unify.
+        if training_graph is not None:
+            # To keep the "compatibility with old NeMo": get output tensors.
+            tensors_to_optimize = training_graph.outputs.tensor_list
+
         if gradient_predivide:
             logging.error(
                 "gradient_predivide is currently disabled, and is under consideration for removal in future versions. "
@@ -1090,9 +1190,9 @@ def train(
 
         if tensors_to_optimize is None:
             # This is Evaluation Mode
-            self._init_callbacks(callbacks)
+            _init_callbacks(callbacks, self)
             # Do action start callbacks
-            self._perform_on_action_end(callbacks=callbacks)
+            _perform_on_action_end(callbacks, get_state(self))
             return
         # Check if tensors_to_optimize is just a list of NmTensors
         elif tensors_to_optimize is not None and (
@@ -1112,7 +1212,7 @@ def train(
             optimizer_class = None
             if isinstance(optimizer, str):
                 optimizer_class = optimizer
-            elif isinstance(optimizer, torch.optim.optimizer):
+            elif isinstance(optimizer, torch.optim.Optimizer):
                 optimizer_instance = optimizer
             else:
                 raise ValueError("optimizer was not understood")
@@ -1125,14 +1225,15 @@ def train(
 
             training_loop = [(optimizer, tensors_to_optimize, opt_call_chain)]
 
-            self.optimizers.append(optimizer)
-            assert len(self.optimizers) == 1, (
-                "There was more than one optimizer, was create_optimizer() " "called before train()?"
+            self._optimizers.append(optimizer)
+            assert len(self._optimizers) == 1, (
+                "There was more than one optimizer, was create_optimizer() called before train()? Are you calling "
+                "train() twice in one script, If so you need to call NeuralModuleFactory.reset_trainer() first."
             )
 
         elif PtActions._check_tuples(tensors_to_optimize):
             if batches_per_step != 1:
-                raise ValueError("Gradient accumlation with multiple " "optimizers is not supported")
+                raise ValueError("Gradient accumlation with multiple optimizers is not supported")
             datasets = []
             training_loop = []
             for step in tensors_to_optimize:
@@ -1151,11 +1252,13 @@ def train(
         # callbacks setup
         if callbacks is not None:
             for callback in callbacks:
-                if not isinstance(callback, ActionCallback):
-                    raise ValueError("A callback was received that was not a " "child of ActionCallback")
+                if not isinstance(callback, ActionCallback) and not isinstance(callback, NeMoCallback):
+                    raise ValueError(
+                        "A callback was received that was not a child of ActionCallback nor a child of NeMoCallback"
+                    )
                 elif isinstance(callback, SimpleLossLoggerCallback):
                     if logging_callchain:
-                        raise ValueError("We only support one logger callback " "but more than one were found")
+                        raise ValueError("We only support one logger callback but more than one were found")
                     logger_step_freq = callback._step_freq
                     logging_tensors = callback.tensors
                     all_tensors = logging_tensors
@@ -1163,16 +1266,14 @@ def train(
                         all_tensors = all_tensors + step[1]
                     (logging_callchain, _,) = self.__get_top_sorted_modules_and_dataloader(hook=all_tensors)
 
-        self._get_all_modules(training_loop, callbacks, logging_callchain)
-
         # Intialize Amp if needed
         if self._optim_level in AmpOptimizations:
             # Store mapping of self.optimizers to optimizer in callchain
             training_loop_opts = []
             for opt in training_loop:
-                training_loop_opts.append(self.optimizers.index(opt[0]))
-            self.optimizers = self.__initialize_amp(
-                optimizer=self.optimizers,
+                training_loop_opts.append(self._optimizers.index(opt[0]))
+            self._optimizers = self.__initialize_amp(
+                optimizer=self._optimizers,
                 optim_level=self._optim_level,
                 amp_max_loss_scale=amp_max_loss_scale,
                 amp_min_loss_scale=optimization_params.get('amp_min_loss_scale', 1.0),
@@ -1180,29 +1281,32 @@ def train(
             # Use stored mapping to map amp_init opts to training loop
             for i, step in enumerate(training_loop):
                 training_loop[i] = (
-                    self.optimizers[training_loop_opts[i]],
+                    self._optimizers[training_loop_opts[i]],
                     step[1],
                     step[2],
                 )
 
         dataNM = training_loop[0][2][0][0]
-        if dataNM.placement == DeviceType.AllGpu:
-            # if len(training_loop) > 1:
-            #     raise NotImplementedError(
-            #         "Distributed training does nor work with multiple "
-            #         "optimizers")
+        placement_gpu = dataNM.placement == DeviceType.AllGpu
+        if placement_gpu:
             logging.info("Doing distributed training")
             if t_dataset is not None:
-                train_sampler = torch.utils.data.distributed.DistributedSampler(
-                    dataset=t_dataset, shuffle=dataNM.shuffle
-                )
-                train_dataloader = torch.utils.data.DataLoader(
-                    dataset=t_dataset,
-                    sampler=train_sampler,
-                    num_workers=dataNM.num_workers,
-                    batch_size=dataNM.batch_size,
-                    shuffle=False,
-                )
+                train_sampler = None
+                if not isinstance(t_dataset, torch.utils.data.IterableDataset):
+                    train_sampler = torch.utils.data.distributed.DistributedSampler(
+                        dataset=t_dataset, shuffle=dataNM.shuffle
+                    )
+                dataloader_params = {
+                    'dataset': t_dataset,
+                    'sampler': train_sampler,
+                    'num_workers': dataNM.num_workers,
+                    'batch_size': dataNM.batch_size,
+                    'shuffle': False,
+                    'pin_memory': dataNM.pin_memory,
+                }
+                if hasattr(dataNM, 'collate_fn'):
+                    dataloader_params['collate_fn'] = dataNM.collate_fn
+                train_dataloader = torch.utils.data.DataLoader(**dataloader_params)
             else:
                 train_dataloader = dataNM.data_iterator
                 if hasattr(train_dataloader, 'sampler'):
@@ -1210,84 +1314,76 @@ def train(
                 else:
                     train_sampler = None
 
-            for train_iter in training_loop:
-                call_chain = train_iter[2]
-                for i in range(1, len(call_chain) - 1):
-                    key = call_chain[i][0].unique_instance_id
-                    pmodule = self.module_reference_table[key][1]
-                    if not isinstance(pmodule, DDP) and isinstance(pmodule, torch.nn.Module):
-                        # gpf = 1
-                        # if gradient_predivide:
-                        #     gpf = dist.get_world_size()
-                        # pmodule = DDP(pmodule, gradient_predivide_factor=gpf)  # Old Apex Method
-
-                        # Per pytorch docs, convert sync bn prior to DDP
-                        if synced_batchnorm:
-                            world_size = dist.get_world_size()
-                            sync_batchnorm_group = None
-                            if synced_batchnorm_groupsize > 0:
-                                if world_size % synced_batchnorm_groupsize != 0:
-                                    raise ValueError(
-                                        f"Synchronized batch norm group size ({synced_batchnorm_groupsize}) must be 0"
-                                        f" or divide total number of GPUs ({world_size})."
-                                    )
-                                sync_batchnorm_group = torch.distributed.new_group(synced_batchnorm_groupsize)
-                            pmodule = nn.SyncBatchNorm.convert_sync_batchnorm(
-                                pmodule, process_group=sync_batchnorm_group
+            self.ddp_initialized = True
+            module_list = [mod.name for mod in AppState().modules]
+            module_list = sorted(module_list)
+            for module_name in module_list:
+                module = AppState().modules[module_name]
+                key = module.unique_instance_id
+                num_trainable_weights = module.num_weights
+                self.ddp_module_dict[key] = module
+                if not isinstance(module, DDP) and isinstance(module, torch.nn.Module) and num_trainable_weights > 0:
+                    # Per pytorch docs, convert sync bn prior to DDP
+                    if synced_batchnorm:
+                        world_size = dist.get_world_size()
+                        sync_batchnorm_group = None
+                        if synced_batchnorm_groupsize > 0:
+                            if world_size % synced_batchnorm_groupsize != 0:
+                                raise ValueError(
+                                    f"Synchronized batch norm group size ({synced_batchnorm_groupsize}) must be 0"
+                                    f" or divide total number of GPUs ({world_size})."
+                                )
+                            # Find ranks of other nodes in the same batchnorm group
+                            rank = torch.distributed.get_rank()
+                            group = rank // synced_batchnorm_groupsize
+                            group_rank_ids = range(
+                                group * synced_batchnorm_groupsize, (group + 1) * synced_batchnorm_groupsize
                             )
+                            sync_batchnorm_group = torch.distributed.new_group(group_rank_ids)
 
-                        # By default, disable broadcast_buffers. This disables batch norm synchronization on forward
-                        # pass
-                        pmodule = DDP(
-                            pmodule, device_ids=[self.local_rank], broadcast_buffers=False, find_unused_parameters=True
-                        )
+                        module = nn.SyncBatchNorm.convert_sync_batchnorm(module, process_group=sync_batchnorm_group)
 
-                    # # Convert batchnorm modules to synced if applicable
-                    # if synced_batchnorm and isinstance(pmodule, torch.nn.Module):
-                    #     world_size = dist.get_world_size()
-                    #     if synced_batchnorm_groupsize > 0 and world_size % synced_batchnorm_groupsize != 0:
-                    #         raise ValueError(
-                    #             f"Synchronized batch norm group size"
-                    #             f" ({synced_batchnorm_groupsize}) must be 0"
-                    #             f" or divide total number of GPUs"
-                    #             f" ({world_size})."
-                    #         )
-                    #     process_group = create_syncbn_process_group(synced_batchnorm_groupsize)
-                    #     pmodule = convert_syncbn(pmodule, process_group=process_group)
-
-                    self.module_reference_table[key] = (
-                        self.module_reference_table[key][0],
-                        pmodule,
+                    # By default, disable broadcast_buffers. This disables batch norm synchronization on forward
+                    # pass
+                    module = DDP(
+                        module, device_ids=[self.local_rank], broadcast_buffers=False, find_unused_parameters=True
                     )
+                    self.ddp_module_dict[key] = module
+
         # single GPU/CPU training
         else:
             if t_dataset is not None:
                 train_sampler = None
-                train_dataloader = torch.utils.data.DataLoader(
-                    dataset=t_dataset,
-                    sampler=None,
-                    num_workers=dataNM.num_workers,
-                    batch_size=dataNM.batch_size,
-                    shuffle=dataNM.shuffle,
-                )
+                dataloader_params = {
+                    'dataset': t_dataset,
+                    'sampler': None,
+                    'num_workers': dataNM.num_workers,
+                    'batch_size': dataNM.batch_size,
+                    'shuffle': dataNM.shuffle,
+                    'pin_memory': dataNM.pin_memory,
+                }
+                if hasattr(dataNM, 'collate_fn'):
+                    dataloader_params['collate_fn'] = dataNM.collate_fn
+
+                train_dataloader = torch.utils.data.DataLoader(**dataloader_params)
             else:
                 train_dataloader = dataNM.data_iterator
                 train_sampler = None
 
-        self._init_callbacks(callbacks)
+        _init_callbacks(callbacks, self)
         # Do action start callbacks
-        self._perform_on_action_start(callbacks=callbacks)
+        _perform_on_action_start(callbacks, get_state(self))
 
         # MAIN TRAINING LOOP
         # iteration over epochs
-        while num_epochs is None or self.epoch_num < num_epochs:
+        while num_epochs is None or self.epoch < num_epochs:
             if train_sampler is not None:
-                train_sampler.set_epoch(self.epoch_num)
+                train_sampler.set_epoch(self.epoch)
             if max_steps is not None and self.step >= max_steps:
                 break
 
             # Register epochs start with callbacks
-            self._perform_on_epoch_start(callbacks=callbacks)
+            _perform_on_epoch_start(callbacks, get_state(self))
 
             # iteration over batches in epoch
             batch_counter = 0
@@ -1300,16 +1396,22 @@ def train(
                     curr_optimizer = training_loop[self.step % len(training_loop)][0]
                     curr_optimizer.zero_grad()
                     # Register iteration start with callbacks
-                    self._perform_on_iteration_start(callbacks=callbacks)
+                    _perform_on_step_start(callbacks, get_state(self))
+
+                # Perform batch start callbacks
+                _perform_on_batch_start(callbacks, get_state(self))
 
                 # set learning rate policy
                 if lr_policy is not None:
-                    adjusted_lr = lr_policy(optimization_params["lr"], self.step, self.epoch_num)
+                    adjusted_lr = lr_policy(optimization_params["lr"], self.step, self.epoch)
                     for param_group in curr_optimizer.param_groups:
                         param_group["lr"] = adjusted_lr
-                if self.tb_writer is not None:
-                    value = curr_optimizer.param_groups[0]['lr']
-                    self.tb_writer.add_scalar('param/lr', value, self.step)
+
+                # TODO: Remove below loop when ActionCallback is removed
+                if callbacks is not None:
+                    for callback in callbacks:
+                        if isinstance(callback, ActionCallback):
+                            callback.learning_rate = curr_optimizer.param_groups[0]['lr']
 
                 # registered_tensors will contain created tensors
                 # named by output port and uuid of module which created them
@@ -1327,39 +1429,35 @@ def train(
                     else:
                         tensors.append(d)
 
-                registered_tensors = {
-                    t.unique_name: d for t, d in zip(curr_call_chain[0][2].values(), tensors) if t is not None
-                }
+                for t, d in zip(curr_call_chain[0][2].values(), tensors):
+                    if t is not None:
+                        self._training_state.set_tensor(t, d)
                 disable_allreduce = batch_counter < (batches_per_step - 1)
                 self.__nm_graph_forward_pass(
-                    call_chain=curr_call_chain, registered_tensors=registered_tensors,
+                    call_chain=curr_call_chain, registered_tensors=self._training_state.tensor_dict,
                 )
 
                 curr_tensors_to_optimize = training_loop[self.step % len(training_loop)][1]
                 final_loss = 0
-                nan = False
                 for tensor in curr_tensors_to_optimize:
-                    if (
-                        torch.isnan(registered_tensors[tensor.unique_name]).any()
-                        or torch.isinf(registered_tensors[tensor.unique_name]).any()
-                    ):
-                        if stop_on_nan_loss:
-                            raise ValueError('Loss is NaN or inf - exiting')
-                        logging.warning('Loss is NaN or inf')
-                        curr_optimizer.zero_grad()
-                        nan = True
-                        break
-                    final_loss += registered_tensors[tensor.unique_name]
-                if nan:
-                    continue
+                    final_loss += self._training_state.tensor_dict[tensor.unique_name]
+
+                # Check for NaN/inf loss (across workers if applicable)
+                loss_nan_inf_checker = final_loss.clone()
+                if placement_gpu:
+                    dist.all_reduce(loss_nan_inf_checker, torch.distributed.ReduceOp.MAX)
+                if torch.isnan(loss_nan_inf_checker).any() or torch.isinf(loss_nan_inf_checker).any():
+                    if stop_on_nan_loss:
+                        raise ValueError('Loss is NaN or inf - exiting')
+                    if self._optim_level in AmpOptimizations and self._optim_level != Optimization.mxprO0:
+                        logging.warning('Loss is NaN or inf.')
+                    else:
+                        # Skip this step across workers if loss is NaN/inf and using fp32
+                        logging.warning('Loss is NaN or inf. Skipping update.')
+                        continue
+
                 if self._optim_level in AmpOptimizations and self._optim_level != Optimization.mxprO0:
                     with amp.scale_loss(final_loss, curr_optimizer, delay_unscale=disable_allreduce) as scaled_loss:
-                        if torch.isnan(scaled_loss).any() or torch.isinf(scaled_loss).any():
-                            if stop_on_nan_loss:
-                                raise ValueError('Loss is NaN or inf -' ' exiting')
-                            logging.warning('WARNING: Loss is NaN or inf')
-                            curr_optimizer.zero_grad()
-                            continue
                         if disable_allreduce:
                             with ExitStack() as stack:
                                 for mod in self.get_DDP_modules(curr_call_chain):
@@ -1380,12 +1478,15 @@ def train(
                             final_loss.backward(bps_scale.to(final_loss.get_device()))
                     # single device (CPU or GPU)
                     else:
-                        # Fix (workaround?) enabling to backpropagate gradiens on CPUs.
+                        # Fix (workaround?) enabling to backpropagate gradients on CPUs.
                         if final_loss.get_device() < 0:
                             final_loss.backward(bps_scale)
                         else:
                             final_loss.backward(bps_scale.to(final_loss.get_device()))
 
+                # Perform batch end callbacks
+                _perform_on_batch_end(callbacks, get_state(self))
+
                 batch_counter += 1
 
                 if batch_counter == batches_per_step:
@@ -1395,16 +1496,17 @@ def train(
                     curr_optimizer.step()
                     batch_counter = 0
                     # Register iteration end with callbacks
-                    self._update_callbacks(
-                        callbacks=callbacks, registered_tensors=registered_tensors,
+                    _update_callbacks(
+                        callbacks, registered_tensors=self._training_state.tensor_dict, final_loss=final_loss
                     )
-                    self._perform_on_iteration_end(callbacks=callbacks)
+                    _perform_on_step_end(callbacks, get_state(self))
                     self.step += 1
+                self._training_state.clear_dict()
             # End of epoch for loop
             # Register epochs end with callbacks
-            self._perform_on_epoch_end(callbacks=callbacks)
-            self.epoch_num += 1
-        self._perform_on_action_end(callbacks=callbacks)
+            _perform_on_epoch_end(callbacks, get_state(self))
+            self.epoch += 1
+        _perform_on_action_end(callbacks, get_state(self))
 
     def infer(
         self,
@@ -1435,9 +1537,9 @@ def infer(
             modules_to_restore_name = []
             for mod in modules_to_restore:
                 if not isinstance(mod, NeuralModule):
-                    raise ValueError("Found something that was not a Neural " "Module inside modules_to_restore")
+                    raise ValueError("Found something that was not a Neural Module inside modules_to_restore")
                 elif mod.num_weights == 0:
-                    raise ValueError("Found a Neural Module with 0 weights " "inside modules_to_restore")
+                    raise ValueError("Found a Neural Module with 0 weights inside modules_to_restore")
                 modules_to_restore_name.append(str(mod))
 
             module_checkpoints = get_checkpoint_from_dir(modules_to_restore_name, checkpoint_dir, ckpt_pattern)
@@ -1477,7 +1579,7 @@ def get_DDP_modules(self, call_chain):
         modules = []
         for ind in range(1, len(call_chain)):
             m_id = call_chain[ind][0].unique_instance_id
-            module = self.module_reference_table[m_id][1]
+            module = self.ddp_module_dict[m_id]
             if isinstance(module, DDP):
                 modules.append(module)
 
diff --git a/nemo/backends/pytorch/common/__init__.py b/nemo/backends/pytorch/common/__init__.py
index adf89ab704d3..c80017b33e10 100644
--- a/nemo/backends/pytorch/common/__init__.py
+++ b/nemo/backends/pytorch/common/__init__.py
@@ -1,4 +1,5 @@
 from nemo.backends.pytorch.common.losses import *
+from nemo.backends.pytorch.common.multi_data import *
 from nemo.backends.pytorch.common.other import *
 from nemo.backends.pytorch.common.parts import *
 from nemo.backends.pytorch.common.rnn import *
diff --git a/nemo/backends/pytorch/common/losses.py b/nemo/backends/pytorch/common/losses.py
index 9d14f763e22d..ad25a5dd6773 100644
--- a/nemo/backends/pytorch/common/losses.py
+++ b/nemo/backends/pytorch/common/losses.py
@@ -2,11 +2,10 @@
 from torch import nn
 
 from nemo.backends.pytorch.nm import LossNM
-from nemo.core.neural_types import LabelsType, LogitsType, LossType, NeuralType, RegressionValuesType
+from nemo.core.neural_types import LabelsType, LogitsType, LossType, MaskType, NeuralType, RegressionValuesType
+from nemo.utils.decorators import add_port_docs
 
-__all__ = ['SequenceLoss', 'CrossEntropyLoss', 'MSELoss']
-
-EPS = 1e-5
+__all__ = ['SequenceLoss', 'CrossEntropyLossNM', 'MSELoss', 'LossAggregatorNM', 'BCEWithLogitsLossNM']
 
 
 class SequenceLoss(LossNM):
@@ -28,27 +27,34 @@ class SequenceLoss(LossNM):
         ctc_blank_id (int): ID of blank symbols to pass to mask when
             calculating ctc loss.
             Defaults to None.
+        eps (float): small number to prevent division by zero in loss calculation
+            Defaults to 1e-5.
 
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {'log_probs': NeuralType(axes=('B', 'T', 'D')), 'targets': NeuralType(axes=('B', 'T'))}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
-
-        loss:
-            NeuralType(None)
-
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
     def __init__(
-        self, pad_id=0, smoothing_coef=0.0, sample_wise=False, aux_ctc=False, ctc_initial_coef=0.1, ctc_blank_id=None
+        self,
+        pad_id=0,
+        smoothing_coef=0.0,
+        sample_wise=False,
+        aux_ctc=False,
+        ctc_initial_coef=0.1,
+        ctc_blank_id=None,
+        eps=1e-5,
     ):
         assert (not aux_ctc) or (ctc_blank_id is not None), "Should be a blank id if using CTC loss"
 
@@ -59,6 +65,7 @@ def __init__(
         self.sample_wise = sample_wise
         self.aux_ctc = aux_ctc
         self.ctc_coef = ctc_initial_coef
+        self.eps = eps
 
         if aux_ctc:
             self.ctc = nn.CTCLoss(blank=ctc_blank_id, reduction='none', zero_infinity=True)
@@ -86,7 +93,7 @@ def _ce_loss(self, log_probs, targets, pad_mask):
         if self.sample_wise:
             loss /= target_log_probs.size(0)
         else:
-            loss /= pad_mask.sum() + EPS
+            loss /= pad_mask.sum() + self.eps
         return loss
 
     def _ctc_loss(self, log_probs, targets, pad_mask):
@@ -96,22 +103,28 @@ def _ctc_loss(self, log_probs, targets, pad_mask):
         return loss
 
 
-class CrossEntropyLoss(LossNM):
+class CrossEntropyLossNM(LossNM):
     """
     CrossEntropyLoss
-
+    Args:
+        logits_ndim (int): number of dimensions (or rank) of the logits tensor
+        weight (list): list of rescaling weight given to each class
+        reduction (str): type of the reduction over the batch
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {
-            "logits": NeuralType(axes=('B', 'D'), elements_type=LogitsType()),
-            "labels": NeuralType(axes=tuple('B'), elements_type=LabelsType()),
+            "logits": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 1), LogitsType()),
+            "labels": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 2), LabelsType()),
+            "loss_mask": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 2), MaskType(), optional=True),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -120,19 +133,41 @@ def output_ports(self):
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self, weight=None):
+    def __init__(self, logits_ndim=2, weight=None, reduction='mean'):
         super().__init__()
+
         if weight:
             weight = torch.FloatTensor(weight).to(self._device)
-        self._criterion = nn.CrossEntropyLoss(weight=weight)
+        self._criterion = nn.CrossEntropyLoss(weight=weight, reduction=reduction)
+        self._logits_dim = logits_ndim
+
+    def _loss_function(self, logits, labels, loss_mask=None):
+        """
+        Args:
+            logits (float): output of the classifier
+            labels (long): ground truth labels
+            loss_mask (bool/float/int): tensor to specify the masking
+        """
+        logits_flatten = torch.flatten(logits, start_dim=0, end_dim=-2)
+        labels_flatten = torch.flatten(labels, start_dim=0, end_dim=-1)
+
+        if loss_mask is not None:
+            if loss_mask.dtype is not torch.bool:
+                loss_mask = loss_mask > 0.5
+            loss_mask_flatten = torch.flatten(loss_mask, start_dim=0, end_dim=-1)
+            logits_flatten = logits_flatten[loss_mask_flatten]
+            labels_flatten = labels_flatten[loss_mask_flatten]
 
-    def _loss_function(self, logits, labels):
-        loss = self._criterion(logits, labels)
+        if len(labels_flatten) == 0:
+            return self._criterion(logits, torch.argmax(logits, dim=-1))
+
+        loss = self._criterion(logits_flatten, labels_flatten)
         return loss
 
 
 class MSELoss(LossNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
 
@@ -148,6 +183,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -156,10 +192,121 @@ def output_ports(self):
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self):
+    def __init__(self, reduction='mean'):
         super().__init__()
-        self._criterion = nn.MSELoss()
+        self._criterion = nn.MSELoss(reduction=reduction)
 
     def _loss_function(self, preds, labels):
         loss = self._criterion(preds, labels)
         return loss
+
+
+class LossAggregatorNM(LossNM):
+    """
+    Neural module which combines sums several losses into one.
+
+    Args:
+        num_inputs (int): number of input losses
+        weights (list of floats): a list of coefficient for merging losses
+    """
+
+    @property
+    def input_ports(self):
+        """Returns definitions of module input ports.
+
+        """
+        input_ports = {}
+        for i in range(self._num_losses):
+            input_ports["loss_" + str(i + 1)] = NeuralType(elements_type=LossType())
+
+        return input_ports
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+
+        loss:
+            NeuralType(None)
+        """
+        return {"loss": NeuralType(elements_type=LossType())}
+
+    def __init__(self, num_inputs=2, weights=None):
+        # Store number of inputs/losses.
+        self._num_losses = num_inputs
+        if weights is not None and len(weights) != num_inputs:
+            raise ValueError("Length of weights should be equal to the number of inputs (num_inputs)")
+
+        self._weights = weights
+        LossNM.__init__(self)
+
+    def _loss_function(self, **kwargs):
+        values = [kwargs[x] for x in sorted(kwargs.keys())]
+        loss = torch.zeros_like(values[0])
+        for loss_idx, loss_value in enumerate(values):
+            if self._weights is not None:
+                loss = loss.add(loss_value, alpha=self._weights[loss_idx])
+            else:
+                loss = loss.add(loss_value)
+        return loss
+
+
+class BCEWithLogitsLossNM(LossNM):
+    """
+    CrossEntropyLoss
+    Args:
+        logits_ndim (int): number of dimensions (or rank) of the logits tensor
+        weight (list): list of rescaling weight given to each class
+        reduction (str): type of the reduction over the batch
+    """
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            "logits": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 1), LogitsType()),
+            "labels": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 2), LabelsType()),
+            "loss_mask": NeuralType(['B'] + ['ANY'] * (self._logits_dim - 2), MaskType(), optional=True),
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+
+        loss:
+            NeuralType(None)
+        """
+        return {"loss": NeuralType(elements_type=LossType())}
+
+    def __init__(self, logits_ndim=2, weight=None, reduction='mean'):
+        super().__init__()
+
+        if weight:
+            weight = torch.FloatTensor(weight).to(self._device)
+        self._criterion = nn.BCEWithLogitsLoss(weight=weight, reduction=reduction)
+        self._logits_dim = logits_ndim
+
+    def _loss_function(self, logits, labels, loss_mask=None):
+        """
+        Args:
+            logits (float): output of the classifier
+            labels (long): ground truth labels
+            loss_mask (bool/float/int): tensor to specify the masking
+        """
+        logits_flatten = torch.flatten(logits, start_dim=0, end_dim=-2)
+        labels_flatten = torch.flatten(labels, start_dim=0, end_dim=-1)
+
+        if loss_mask is not None:
+            if loss_mask.dtype is not torch.bool:
+                loss_mask = loss_mask > 0.5
+            loss_mask_flatten = torch.flatten(loss_mask, start_dim=0, end_dim=-1)
+            logits_flatten = logits_flatten[loss_mask_flatten]
+            labels_flatten = labels_flatten[loss_mask_flatten]
+
+        if len(labels_flatten) == 0:
+            return 0
+
+        loss = self._criterion(logits_flatten, labels_flatten)
+        return loss
diff --git a/nemo/backends/pytorch/common/multi_data.py b/nemo/backends/pytorch/common/multi_data.py
new file mode 100644
index 000000000000..b51b23587aa1
--- /dev/null
+++ b/nemo/backends/pytorch/common/multi_data.py
@@ -0,0 +1,134 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from enum import Enum
+from typing import List
+
+import numpy as np
+import torch
+
+from nemo import logging
+from nemo.backends.pytorch.nm import DataLayerNM
+from nemo.core.neural_types import *
+
+__all__ = ['MultiDataLayer', 'DataCombination']
+
+
+class DataCombination(Enum):
+    CROSSPRODUCT = 1
+    ZIP = 2
+
+
+class MultiDataLayer(DataLayerNM):
+    def __init__(
+        self,
+        data_layers: List[DataLayerNM],
+        batch_size: int,
+        shuffle: bool = False,
+        combination_mode: DataCombination = DataCombination.CROSSPRODUCT,
+        port_names: List[str] = None,
+    ):
+        """
+        data_layers: (list) of DataLayerNM objects
+        batch_size: (int) batchsize when the underlying dataset is loaded
+        combination_mode: (DataCombination) defines how to combine the datasets.
+        shuffle: (bool) whether underlying multi dataset should be shuffled in each epoch
+        port_names: List(str) user can override all port names if specified 
+        """
+        super().__init__()
+        self._data_layers = data_layers
+        self._batch_size = batch_size
+        self._shuffle = shuffle
+        self._combination_mode = combination_mode
+        self._port_names = port_names
+        self._dataset = MultiDataset(
+            datasets=[dl.dataset for dl in self._data_layers], combination_mode=combination_mode
+        )
+
+        self._ports = dict()
+        if self._port_names:
+            i = 0
+            for dl in self._data_layers:
+                for _, port_type in dl.output_ports.items():
+                    self._ports[self._port_names[i]] = port_type
+                    i += 1
+        else:
+            for dl_idx, dl in enumerate(self._data_layers):
+                for port_name, port_type in dl.output_ports.items():
+                    if port_name in self._ports:
+                        logging.warning(f"name collision {port_name}, will rename")
+                        self._ports[f"{port_name}_{dl_idx}"] = port_type
+                    else:
+                        self._ports[port_name] = port_type
+
+    @property
+    def output_ports(self):
+        """Return: dict
+        Returns union of all individual data_layer output ports
+        In case of name collision, resolve by renaming 
+        """
+        return self._ports
+
+    def __len__(self):
+        return len(self._dataset)
+
+    @property
+    def dataset(self):
+        return self._dataset
+
+    @property
+    def data_iterator(self):
+        return None
+
+
+class MultiDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        datasets: List[torch.utils.data.Dataset],
+        combination_mode: DataCombination = DataCombination.CROSSPRODUCT,
+    ):
+        """
+        Datasets: list of torch.utils.data.Dataset objects.
+        combination_mode: DataCombination, defines how to combine the datasets, Options are [DataCombination.CROSSPRODUCT, DataCombination.ZIP]. 
+        """
+        self.datasets = datasets
+        self.combination_mode = combination_mode
+        if self.combination_mode == DataCombination.CROSSPRODUCT:
+            self.len = np.prod([len(d) for d in self.datasets])
+        elif self.combination_mode == DataCombination.ZIP:
+            ds_lens = [len(d) for d in self.datasets]
+            self.len = np.min(ds_lens)
+            if len(set(ds_lens)) != 1:
+                raise ValueError("datasets do not have equal lengths.")
+        else:
+            raise ValueError("combination_mode unknown")
+
+    def __getitem__(self, i):
+        """
+        Returns list [x1, x2, ...xn] where x1 \in D1, x2 \in D2, ..., xn \in Dn
+        """
+
+        return [x for d in self.datasets for x in d[i % len(d)]]
+
+    def __len__(self):
+        """
+        Returns length of this dataset (int).
+        In case of  DataCombination.CROSSPRODUCT this would be prod(len(d) for d in self.datasets). 
+        In case of  DataCombination.ZIP this would be min(len(d) for d in self.datasets) given that all datasets have same length. 
+        """
+        return self.len
diff --git a/nemo/backends/pytorch/common/rnn.py b/nemo/backends/pytorch/common/rnn.py
index ca67154786d0..c1c62ac08c35 100644
--- a/nemo/backends/pytorch/common/rnn.py
+++ b/nemo/backends/pytorch/common/rnn.py
@@ -23,6 +23,7 @@
 from nemo.backends.pytorch.common.parts import Attention
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core import *
+from nemo.utils.decorators import add_port_docs
 from nemo.utils.misc import pad_to
 
 __all__ = ['DecoderRNN', 'EncoderRNN']
@@ -65,28 +66,22 @@ class DecoderRNN(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {
-            # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             'targets': NeuralType(('B', 'T'), LabelsType()),
-            # 'encoder_outputs': NeuralType(
-            #   {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}, optional=True,
-            # ),
             'encoder_outputs': NeuralType(('B', 'T', 'D'), ChannelType(), True),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {
-            # 'log_probs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),}),
             'log_probs': NeuralType(('B', 'T', 'D'), LogprobsType()),
-            # 'attention_weights': NeuralType(
-            #    {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag),}, optional=True,
-            # ),
             'attention_weights': NeuralType(('B', 'T', 'T'), ChannelType(), True),
         }
 
@@ -203,23 +198,21 @@ class EncoderRNN(TrainableNM):
     """ Simple RNN-based encoder using GRU cells """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {
-            # 'inputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # 'input_lens': NeuralType({0: AxisType(BatchTag),}, optional=True),
             'inputs': NeuralType(('B', 'T'), ChannelType()),
             'input_lens': NeuralType(tuple('B'), LengthsType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {
-            # 'outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # 'hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
             'outputs': NeuralType(('B', 'T', 'D'), ChannelType()),
             'hidden': NeuralType(('B', 'T', 'D'), ChannelType()),
         }
diff --git a/nemo/backends/pytorch/common/search.py b/nemo/backends/pytorch/common/search.py
index acaf32213016..b2fc2892e031 100644
--- a/nemo/backends/pytorch/common/search.py
+++ b/nemo/backends/pytorch/common/search.py
@@ -4,6 +4,7 @@
 
 from nemo.backends.pytorch.nm import NonTrainableNM
 from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 INF = float('inf')
 BIG_NUM = 1e4
@@ -29,6 +30,7 @@ class GreedySearch(NonTrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -40,6 +42,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
diff --git a/nemo/backends/pytorch/module_wrapper.py b/nemo/backends/pytorch/module_wrapper.py
index 1a5de7595d6a..cbd23b38ca55 100644
--- a/nemo/backends/pytorch/module_wrapper.py
+++ b/nemo/backends/pytorch/module_wrapper.py
@@ -47,12 +47,6 @@ def __call__(self, force_pt=False, *input, **kwargs):
         else:
             return NeuralModule.__call__(self, **kwargs)
 
-    def get_weights(self):
-        result = dict()
-        for name, parameter in self.named_parameters():
-            result[name] = (parameter, parameter.requires_grad)
-        return result
-
     def save_to(self, path):
         t.save(self._pt_module.state_dict(), path)
 
@@ -82,6 +76,8 @@ def get_weights(self):
         return result
 
     def set_weights(self, name2weight, name2name_and_transform=None):
+        if name2name_and_transform:
+            raise NotImplementedError("Transforms are not currently supported for set_weights")
         self._pt_module.load_state_dict({key: name2weight[key][0] for key in name2weight.keys()})
 
     def tie_weights_with(self, module, weight_names):
diff --git a/nemo/backends/pytorch/nm.py b/nemo/backends/pytorch/nm.py
index 5da1a861e903..0ed8e4ee66de 100644
--- a/nemo/backends/pytorch/nm.py
+++ b/nemo/backends/pytorch/nm.py
@@ -1,4 +1,19 @@
-# Copyright (c) 2019 NVIDIA Corporation
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import os
 from abc import abstractmethod
 from typing import Dict, List, Optional, Set, Tuple
@@ -6,8 +21,8 @@
 import torch as t
 import torch.nn as nn
 
-from ...core import DeviceType, NeuralModule, WeightShareTransform
-from ...utils.helpers import get_cuda_device, rgetattr, rsetattr
+from nemo.core import DeviceType, ModuleType, NeuralModule, WeightShareTransform
+from nemo.utils.helpers import get_cuda_device, rgetattr, rsetattr
 
 
 class TrainableNM(NeuralModule, nn.Module):
@@ -34,10 +49,16 @@ def __init__(self):
 
     """
 
-    def __init__(self, pretrained_model_name=None):
-
-        NeuralModule.__init__(self)  # For NeuralModule API
+    def __init__(self, pretrained_model_name=None, name=None):
+        # Initialize nn.Module first - important for the inspect during the init_params collection.
         nn.Module.__init__(self)  # For PyTorch API
+        NeuralModule.__init__(self, name)  # For NeuralModule API
+
+        # Unfrozen by default.
+        self._frozen = False
+
+        # Set module type.
+        self._type = ModuleType.trainable
 
         self._device = get_cuda_device(self.placement)
 
@@ -64,7 +85,7 @@ def set_weights(self, name2weight, name2name_and_transform=None):
             if name2name_and_transform is None:
                 self.load_state_dict({key: name2weight[key][0] for key in name2weight.keys()})
             else:
-                self.load_state_dict({key: name2weight[key][0] for key in name2weight.keys()})
+                raise NotImplementedError("Transforms are not currently supported for set_weights")
 
     @t.jit.ignore
     def tie_weights_with(self, module, weight_names, name2name_and_transform=None):
@@ -112,6 +133,8 @@ def freeze(self, weights=None):
             for name, param in self.named_parameters():
                 if weights is None or name in weights:
                     param.requires_grad = False
+        # Freeze.
+        self._frozen = True
 
     @t.jit.ignore
     def unfreeze(self, weights=None):
@@ -123,6 +146,15 @@ def unfreeze(self, weights=None):
             for name, param in self.named_parameters():
                 if weights is None or name in weights:
                     param.requires_grad = True
+        # Unfreeze.
+        self._frozen = False
+
+    @t.jit.ignore
+    def is_frozen(self) -> bool:
+        """ Returns:
+                True/False depending whether there are any frozen weights or not.
+        """
+        return self._frozen
 
     @property
     def num_weights(self):
@@ -130,9 +162,11 @@ def num_weights(self):
 
 
 class NonTrainableNM(NeuralModule):
-    def __init__(self):
-        NeuralModule.__init__(self)  # For NeuralModule API
+    def __init__(self, name=None):
+        NeuralModule.__init__(self, name)  # For NeuralModule API
         self._device = get_cuda_device(self.placement)
+        # Set module type.
+        self._type = ModuleType.nontrainable
 
     def __call__(self, force_pt=False, *input, **kwargs):
         pt_call = len(input) > 0 or force_pt
@@ -170,7 +204,7 @@ def tie_weights_with(
     def save_to(self, path: str):
         pass
 
-    def restore_from(self, path: str):
+    def restore_from(self, path: str, local_rank: int = 0):
         pass
 
     def freeze(self, weights: Set[str] = None):
@@ -190,12 +224,16 @@ class DataLayerNM(NeuralModule):
     data_iterator property to return iterator over the dataset.
     """
 
-    def __init__(self):
-        NeuralModule.__init__(self)  # For NeuralModule API
+    def __init__(self, name=None):
+        NeuralModule.__init__(self, name)  # For NeuralModule API
+
+        # Set module type.
+        self._type = ModuleType.datalayer
+
         self._device = get_cuda_device(self.placement)
 
         # if 'batch_size' not in kwargs:
-        #    nemo.logging.warning("No batch_size specified in the data layer. "
+        #    logging.warning("No batch_size specified in the data layer. "
         #                    "Setting batch_size to 1.")
         #    kwargs['batch_size'] = 1
 
@@ -206,6 +244,7 @@ def __init__(self):
         self._batch_size = 1
         self._num_workers = os.cpu_count()  # Use all CPUs by default.
         self._shuffle = False  # Don't shuffle by default.
+        self._pin_memory = False
 
     @property
     def input_ports(self):
@@ -217,46 +256,46 @@ def input_ports(self):
         return {}
 
     def get_weights(self):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any weights to return. "
         #     "This get_weights call returns None."
         # )
         return None
 
     def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any weights to set. "
         #     "This set_weights call is ignored."
         # )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any weights to tie. "
         #     "This tie_weights_with call is ignored."
         # )
         return None
 
     def save_to(self, path):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any state to save. "
         #     "This save_to call is ignored."
         # )
         return None
 
     def restore_from(self, path):
-        raise NotImplementedError("Data Layer could not be restored from any saved " "state.")
+        raise NotImplementedError("Data Layer could not be restored from any saved state.")
         return None
 
     def freeze(self, weights: Set[str] = None):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any weights to freeze. "
         #     "This freeze call is ignored."
         # )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Data Layer does not have any weights to unfreeze. "
         #     "This unfreeze call is ignored."
         # )
@@ -319,56 +358,65 @@ def num_workers(self):
     #    """ Property setting the number of workers. """
     #    self._num_workers = nw
 
+    @property
+    def pin_memory(self):
+        """ Property returning the pin memory flag. """
+        return self._pin_memory
+
 
 class LossNM(NeuralModule):
     """A helper Base class for creating Pytorch-based loss function modules.
     You must implement _loss_function method.
     """
 
-    def __init__(self):
-        NeuralModule.__init__(self)  # For NeuralModule API
+    def __init__(self, name=None):
+        NeuralModule.__init__(self, name)  # For NeuralModule API
+
+        # Set module type.
+        self._type = ModuleType.loss
+
         self._device = get_cuda_device(self.placement)
 
     def get_weights(self):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any weights "
         #      "to return. This get_weights call returns None.")
         return None
 
     def set_weights(self, name2weight: Dict[(str, bool)], name2name_and_transform):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any weights to set. "
         #     "This set_weights call is ignored."
         # )
         return None
 
     def tie_weights_with(self, module, weight_names):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any weights to tie. "
         #     "This tie_weights_with call is ignored."
         # )
         return None
 
     def save_to(self, path):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any state to save. "
         #     "This save_to call is ignored."
         # )
         return None
 
     def restore_from(self, path):
-        raise NotImplementedError("Loss function module could not be restored from " "any saved " "state.")
+        raise NotImplementedError("Loss function module could not be restored from any saved state.")
         return None
 
     def freeze(self, weights: Set[str] = None):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any weights to freeze. "
         #     "This freeze call is ignored."
         # )
         return None
 
     def unfreeze(self, weights: Set[str] = None):
-        # nemo.logging.warning(
+        # logging.warning(
         #     "Loss function module does not have any weights to "
         #     "unfreeze. This unfreeze call is ignored."
         # )
diff --git a/nemo/backends/pytorch/optimizers.py b/nemo/backends/pytorch/optimizers.py
index a9977b4ae365..5eea99728b4d 100644
--- a/nemo/backends/pytorch/optimizers.py
+++ b/nemo/backends/pytorch/optimizers.py
@@ -71,7 +71,7 @@ def step(self, closure=None):
                     continue
                 grad = p.grad.data
                 if grad.is_sparse:
-                    raise RuntimeError("Adam does not support sparse gradients, please " "consider SparseAdam instead")
+                    raise RuntimeError("Adam does not support sparse gradients, please consider SparseAdam instead")
                 amsgrad = group["amsgrad"]
                 state = self.state[p]
 
diff --git a/nemo/backends/pytorch/torchvision/data/image_folder.py b/nemo/backends/pytorch/torchvision/data/image_folder.py
index 5c4946b5cdd5..3cb3eaa8344d 100644
--- a/nemo/backends/pytorch/torchvision/data/image_folder.py
+++ b/nemo/backends/pytorch/torchvision/data/image_folder.py
@@ -3,6 +3,7 @@
 
 from .....core import *
 from ...nm import DataLayerNM
+from nemo.utils.decorators import add_port_docs
 
 
 class ImageFolderDataLayer(DataLayerNM):
@@ -10,32 +11,22 @@ class ImageFolderDataLayer(DataLayerNM):
     NeuralModule."""
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
-
-        image:
-            0: AxisType(BatchTag)
-
-            1: AxisType(ChannelTag)
-
-            2: AxisType(HeightTag, input_size)
-
-            3: AxisType(WidthTag, input_size)
-
-
-        label:
-            0: AxisType(BatchTag)
         """
         return {
-            "image": NeuralType(
-                {
-                    0: AxisType(BatchTag),
-                    1: AxisType(ChannelTag),
-                    2: AxisType(HeightTag, self._input_size),
-                    3: AxisType(WidthTag, self._input_size),
-                }
-            ),
-            "label": NeuralType({0: AxisType(BatchTag)}),
+            # "image": NeuralType(
+            #     {
+            #         0: AxisType(BatchTag),
+            #         1: AxisType(ChannelTag),
+            #         2: AxisType(HeightTag, self._input_size),
+            #         3: AxisType(WidthTag, self._input_size),
+            #     }
+            # ),
+            # "label": NeuralType({0: AxisType(BatchTag)}),
+            "image": NeuralType(elements_type=ChannelType(), axes=('B', 'C', 'H', 'W')),
+            "label": NeuralType(elements_type=LogitsType(), axes=tuple('B')),
         }
 
     def __init__(self, batch_size, path, input_size=32, shuffle=True, is_eval=False):
diff --git a/nemo/backends/pytorch/tutorials/chatbot/modules.py b/nemo/backends/pytorch/tutorials/chatbot/modules.py
index 14d704b4d4fc..2459afa158b0 100644
--- a/nemo/backends/pytorch/tutorials/chatbot/modules.py
+++ b/nemo/backends/pytorch/tutorials/chatbot/modules.py
@@ -12,12 +12,14 @@
 from .....core.neural_types import *
 from ...nm import DataLayerNM, LossNM, TrainableNM
 from ..chatbot import data
+from nemo.utils.decorators import add_port_docs
 
 
 class DialogDataLayer(DataLayerNM):
     """Class representing data layer for a chatbot."""
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -71,6 +73,7 @@ class EncoderRNN(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -80,6 +83,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -131,6 +135,7 @@ def forward(self, input_seq, input_lengths, hidden=None):
 
 class LuongAttnDecoderRNN(TrainableNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -141,6 +146,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -269,6 +275,7 @@ def forward(self, targets, encoder_outputs, max_target_len):
 
 class MaskedXEntropyLoss(LossNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -279,6 +286,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -306,12 +314,14 @@ def _loss_function(self, **kwargs):
 
 class GreedyLuongAttnDecoderRNN(TrainableNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {"encoder_outputs": NeuralType(('T', 'B', 'D'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
diff --git a/nemo/backends/pytorch/tutorials/toys.py b/nemo/backends/pytorch/tutorials/toys.py
index 442c841ee836..3a803526caed 100644
--- a/nemo/backends/pytorch/tutorials/toys.py
+++ b/nemo/backends/pytorch/tutorials/toys.py
@@ -5,16 +5,16 @@
 import torch.nn as nn
 import torch.utils.data as t_utils
 
-from nemo import logging
 from nemo.backends.pytorch.nm import DataLayerNM, LossNM, TrainableNM
-from nemo.core import DeviceType, NeuralModule
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 
 
 class TaylorNet(TrainableNM):  # Note inheritance from TrainableNM
     """Module which learns Taylor's coefficients."""
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
 
@@ -24,6 +24,7 @@ def input_ports(self):
         return {"x": NeuralType(('B', 'D'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -32,68 +33,25 @@ def output_ports(self):
         """
         return {"y_pred": NeuralType(('B', 'D'), ChannelType())}
 
-    def __init__(self, dim):
-        # Part specific for Neural Modules API:
-        #   (1) call base constructor
-        #   (2) define input and output ports
-        super().__init__()
-
-        # And of Neural Modules specific part. Rest is Pytorch code
-        self._dim = dim
-        self.fc1 = nn.Linear(self._dim, 1)
-        t.nn.init.xavier_uniform_(self.fc1.weight)
-        self._device = t.device("cuda" if self.placement == DeviceType.GPU else "cpu")
-        self.to(self._device)
-
-    # IMPORTANT: input arguments to forward must match input input ports' names
-    def forward(self, x):
-        lst = []
-        for pw in range(self._dim):
-            lst.append(x ** pw)
-        nx = t.cat(lst, dim=-1)
-        return self.fc1(nx)
-
-
-class TaylorNetO(TrainableNM):  # Note inheritance from TrainableNM
-    """Module which learns Taylor's coefficients."""
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-
+    def __init__(self, dim, name=None):
         """
-        return {
-            "x": NeuralType(('B', 'D'), ChannelType()),
-            "o": NeuralType(('B', 'D'), ChannelType()),
-        }
+            Creates TaylorNet object.
 
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
+            Args:
+                dim: Number of dimensions (number of terms in Taylor series).
+                name: Name of the module instance
         """
-        return {"y_pred": NeuralType(('B', 'D'), ChannelType(), optional=True)}
-
-    def __init__(self, dim):
-        # Part specific for Neural Modules API:
-        #   (1) call base constructor
-        #   (2) define input and output ports
-        super().__init__()
+        super().__init__(name=name)
 
         # And of Neural Modules specific part. Rest is Pytorch code
         self._dim = dim
         self.fc1 = nn.Linear(self._dim, 1)
         t.nn.init.xavier_uniform_(self.fc1.weight)
-        self._device = t.device("cuda" if self.placement == DeviceType.GPU else "cpu")
         self.to(self._device)
 
     # IMPORTANT: input arguments to forward must match input input ports' names
-    # If port is Optional, the default value should be None
-    def forward(self, x, o=None):
+    def forward(self, x):
         lst = []
-        if o is None:
-            logging.debug("O is None")
-        else:
-            logging.debug("O is not None")
         for pw in range(self._dim):
             lst.append(x ** pw)
         nx = t.cat(lst, dim=-1)
@@ -119,6 +77,7 @@ def __len__(self):
         return self._n
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports
         """
@@ -127,7 +86,7 @@ def output_ports(self):
             "y": NeuralType(('B', 'D'), LabelsType()),
         }
 
-    def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4):
+    def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4, name=None):
         """
             Creates a datalayer returning (x-y) pairs, with n points from a given range.
 
@@ -137,8 +96,9 @@ def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4):
                 n: number of points
                 x_lo: lower boundary along x axis
                 x_hi: higher boundary along x axis
+                name: Name of the module instance
         """
-        super().__init__()
+        super().__init__(name=name)
 
         # Dicionary with handled functions.
         handled_funcs = {"sin": t.sin, "cos": t.cos}
@@ -148,14 +108,11 @@ def __init__(self, batch_size, f_name="sin", n=1000, x_lo=-4, x_hi=4):
 
         self._n = n
         self._batch_size = batch_size
-        self._device = t.device("cuda" if self.placement == DeviceType.GPU else "cpu")
 
-        x_data = t.tensor(np.random.uniform(low=x_lo, high=x_hi, size=self._n)).unsqueeze(-1).to(self._device)
+        x_data = t.tensor(np.random.uniform(low=x_lo, high=x_hi, size=self._n)).unsqueeze(-1)
         y_data = func(x_data)
-
-        self._data_iterator = t_utils.DataLoader(
-            t_utils.TensorDataset(x_data.float(), y_data.float()), batch_size=self._batch_size,
-        )
+        self._dataset = t_utils.TensorDataset(x_data.float(), y_data.float())
+        self._data_iterator = t_utils.DataLoader(self._dataset, batch_size=self._batch_size,)
 
     @property
     def data_iterator(self):
@@ -163,11 +120,12 @@ def data_iterator(self):
 
     @property
     def dataset(self):
-        return None
+        return self._dataset
 
 
 class MSELoss(LossNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
 
@@ -187,13 +145,14 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, name=None):
+        super().__init__(name=name)
         self._criterion = nn.MSELoss()
 
     def _loss_function(self, **kwargs):
@@ -202,6 +161,7 @@ def _loss_function(self, **kwargs):
 
 class L1Loss(LossNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -211,13 +171,14 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, name=None):
+        super().__init__(name=name)
         self._criterion = nn.L1Loss()
 
     def _loss_function(self, **kwargs):
@@ -226,6 +187,7 @@ def _loss_function(self, **kwargs):
 
 class CrossEntropyLoss(LossNM):
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -235,6 +197,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -243,10 +206,8 @@ def output_ports(self):
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self):
-        # Neural Module API specific
-        NeuralModule.__init__(self)
-        # End of Neural Module API specific
+    def __init__(self, name=None):
+        super().__init__(name=name)
         self._criterion = nn.CrossEntropyLoss()
 
     # You need to implement this function
diff --git a/nemo/backends/torch_backend.py b/nemo/backends/torch_backend.py
new file mode 100644
index 000000000000..cbc1730a2275
--- /dev/null
+++ b/nemo/backends/torch_backend.py
@@ -0,0 +1,71 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from os.path import expanduser
+from typing import Any, Dict
+
+import torch
+
+
+def save(checkpoint: Dict[str, Any], filename: str) -> None:
+    """
+    A proxy function that saves the checkpoint to a given file.
+
+    Args:
+        checkpoint: Checkpoint to be stored.
+        filename: Name of the file containing checkpoint.
+    """
+    # Get the absolute path and save.
+    abs_filename = expanduser(filename)
+    torch.save(checkpoint, abs_filename)
+
+
+def load(filename: str) -> Dict[str, Any]:
+    """
+    A proxy function that loads checkpoint from a given file.
+
+    Args:
+        filename: Name of the file containing checkpoint.
+    Returns:
+        Loaded checkpoint.
+    """
+    # Get the absolute path and save.
+    abs_filename = expanduser(filename)
+    # Use map location to be able to load CUDA-trained modules on CPU.
+    return torch.load(abs_filename, map_location=lambda storage, loc: storage)
+
+
+def get_state_dict(model: torch.nn.Module) -> Dict[str, Any]:
+    """
+    A proxy function that gets the state dictionary.
+
+    Args:
+        model: Torch model.
+    Returns:
+        State dictionary containing model weights.
+    """
+    return model.state_dict()
+
+
+def set_state_dict(model: torch.nn.Module, state_dict: Dict[str, Any]) -> None:
+    """
+    A proxy function that sets the state dictionary.
+
+    Args:
+        model: Torch model.
+        state_dict: State dictionary containing model weights.
+    """
+    model.load_state_dict(state_dict)
diff --git a/nemo/collections/asr/__init__.py b/nemo/collections/asr/__init__.py
index f5c858923620..8068b6664c2d 100644
--- a/nemo/collections/asr/__init__.py
+++ b/nemo/collections/asr/__init__.py
@@ -12,23 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-from .audio_preprocessing import *
-from .beam_search_decoder import BeamSearchDecoderWithLM
-from .data_layer import AudioToTextDataLayer, KaldiFeatureDataLayer, TranscriptDataLayer
-from .greedy_ctc_decoder import GreedyCTCDecoder
-from .jasper import JasperDecoderForCTC, JasperEncoder
-from .las.misc import JasperRNNConnector
-from .losses import CTCLossNM
+from nemo.backends.pytorch.common.losses import CrossEntropyLossNM
+from nemo.collections.asr import models
+from nemo.collections.asr.audio_preprocessing import *
+from nemo.collections.asr.beam_search_decoder import BeamSearchDecoderWithLM
+from nemo.collections.asr.contextnet import ContextNetDecoderForCTC, ContextNetEncoder
+from nemo.collections.asr.data_layer import (
+    AudioToSpeechLabelDataLayer,
+    AudioToTextDataLayer,
+    KaldiFeatureDataLayer,
+    TarredAudioToTextDataLayer,
+    TranscriptDataLayer,
+)
+from nemo.collections.asr.greedy_ctc_decoder import GreedyCTCDecoder
+from nemo.collections.asr.jasper import (
+    JasperDecoderForClassification,
+    JasperDecoderForCTC,
+    JasperDecoderForSpkrClass,
+    JasperEncoder,
+)
+from nemo.collections.asr.las.misc import JasperRNNConnector
+from nemo.collections.asr.losses import CTCLossNM
 from nemo.core import Backend
 
 __all__ = [
     'Backend',
     'AudioToTextDataLayer',
+    'TarredAudioToTextDataLayer',
+    'AudioToSpeechLabelDataLayer',
     'AudioPreprocessing',
     'AudioPreprocessor',
     'AudioToMFCCPreprocessor',
     'AudioToMelSpectrogramPreprocessor',
     'AudioToSpectrogramPreprocessor',
+    'CropOrPadSpectrogramAugmentation',
     'MultiplyBatch',
     'SpectrogramAugmentation',
     'KaldiFeatureDataLayer',
@@ -37,8 +54,13 @@
     'BeamSearchDecoderWithLM',
     'JasperEncoder',
     'JasperDecoderForCTC',
+    'JasperDecoderForClassification',
+    'JasperDecoderForSpkrClass',
     'JasperRNNConnector',
+    'ContextNetEncoder',
+    'ContextNetDecoderForCTC',
     'CTCLossNM',
+    'CrossEntropyLossNM',
 ]
 
 backend = Backend.PyTorch
diff --git a/nemo/collections/asr/audio_preprocessing.py b/nemo/collections/asr/audio_preprocessing.py
index 945f4383caac..54f3df7e8f0c 100644
--- a/nemo/collections/asr/audio_preprocessing.py
+++ b/nemo/collections/asr/audio_preprocessing.py
@@ -21,33 +21,44 @@
     'AudioToMFCCPreprocessor',
     'AudioToMelSpectrogramPreprocessor',
     'AudioToSpectrogramPreprocessor',
+    'CropOrPadSpectrogramAugmentation',
     'MultiplyBatch',
     'SpectrogramAugmentation',
+    'TimeStretchAugmentation',
 ]
 
 import math
-import warnings
 from abc import abstractmethod
 
+import numpy as np
 import torch
+from packaging import version
 
 from .parts.features import FilterbankFeatures
 from .parts.spectr_augment import SpecAugment, SpecCutout
 from nemo.backends.pytorch import NonTrainableNM
 from nemo.core import Optimization
 from nemo.core.neural_types import *
+from nemo.utils import logging
+from nemo.utils.decorators import add_port_docs
 
 try:
     import torchaudio
+    import torchaudio.transforms
+    import torchaudio.functional
+
+    TORCHAUDIO_VERSION = version.parse(torchaudio.__version__)
+    TORCHAUDIO_VERSION_MIN = version.parse('0.5')
 
     HAVE_TORCHAUDIO = True
 except ModuleNotFoundError:
     HAVE_TORCHAUDIO = False
-    warnings.warn('Could not import torchaudio. Some features might not work.')
+    logging.warning('Could not import torchaudio. Some features might not work.')
+
 try:
     from apex import amp
 except (AttributeError, ModuleNotFoundError) as e:
-    warnings.warn("Unable to import APEX. Mixed precision and distributed training " "will not work.")
+    logging.warning("Unable to import APEX. Mixed precision and distributed training will not work.")
 
 
 class AudioPreprocessor(NonTrainableNM):
@@ -119,6 +130,7 @@ class AudioToSpectrogramPreprocessor(AudioPreprocessor):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -130,6 +142,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -269,6 +282,7 @@ class AudioToMelSpectrogramPreprocessor(AudioPreprocessor):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -280,6 +294,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -415,6 +430,7 @@ class AudioToMFCCPreprocessor(AudioPreprocessor):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -426,6 +442,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -545,6 +562,7 @@ class SpectrogramAugmentation(NonTrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -555,6 +573,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -608,6 +627,7 @@ class MultiplyBatch(NonTrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -623,6 +643,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -651,6 +672,253 @@ def forward(self, in_x, in_x_len, in_y, in_y_len):
         return out_x, out_x_len, out_y, out_y_len
 
 
+class CropOrPadSpectrogramAugmentation(NonTrainableNM):
+    """
+    Pad or Crop the incoming Spectrogram to a certain shape.
+
+    Args:
+        audio_length (int): the final number of timesteps that is required.
+            The signal will be either padded or cropped temporally to this
+            size.
+    """
+
+    def __init__(self, audio_length, **kwargs):
+        super(CropOrPadSpectrogramAugmentation, self).__init__()
+        self.audio_length = audio_length
+
+    @torch.no_grad()
+    def forward(self, input_signal, length):
+        image = input_signal
+        num_images = image.shape[0]
+
+        audio_length = self.audio_length
+        image_len = image.shape[-1]
+
+        # Crop long signal
+        if image_len > audio_length:  # randomly slice
+            cutout_images = []
+            offset = torch.randint(low=0, high=image_len - audio_length + 1, size=[num_images])
+
+            # TODO: Look into advanced broadcasting to speed up section
+            for idx, offset in enumerate(offset):
+                cutout_images.append(image[idx : idx + 1, :, offset : offset + audio_length])
+
+            image = torch.cat(cutout_images, dim=0)
+            del cutout_images
+
+        else:  # symmetrically pad short signal with zeros
+            pad_left = (audio_length - image_len) // 2
+            pad_right = (audio_length - image_len) // 2
+
+            if (audio_length - image_len) % 2 == 1:
+                pad_right += 1
+
+            image = torch.nn.functional.pad(image, [pad_left, pad_right], mode="constant", value=0)
+
+        # Replace dynamic length sequences with static number of timesteps
+        length = (length * 0) + audio_length
+
+        return image, length
+
+    @property
+    def input_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            # "input_signal": NeuralType(
+            #     {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag), }
+            # ),
+            # "length": NeuralType({0: AxisType(BatchTag)}),
+            "input_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()),
+            "length": NeuralType(tuple('B'), LengthsType()),
+        }
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            # "processed_signal": NeuralType(
+            #     {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag), }
+            # ),
+            # "processed_length": NeuralType({0: AxisType(BatchTag)}),
+            "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()),
+            "processed_length": NeuralType(tuple('B'), LengthsType()),
+        }
+
+
+class TimeStretchAugmentation(NonTrainableNM):
+    def __init__(
+        self,
+        sample_rate: int,
+        probability: float,
+        min_speed_rate: float = 0.9,
+        max_speed_rate: float = 1.1,
+        num_rates: int = 5,
+        n_fft: int = 512,
+    ):
+        """
+        Time-stretch a batch of audio series by a fixed rate while preserving pitch.
+
+        Note that while the speed rate is sampled independently for every batch,
+        all samples of that batch will be augmented by the same speed rate.
+
+        Note:
+        This is a simplified implementation, intended primarily for reference and pedagogical purposes.
+        It makes no attempt to handle transients, and is likely to produce audible artifacts.
+
+        Args:
+            sample_rate: Sampling rate.
+            probability: Float value declaring chance of the input being augmented.
+                Must be a float value in the range [0, 1].
+            min_speed_rate: Minimum sampling rate modifier.
+            max_speed_rate: Maximum sampling rate modifier.
+            num_rates: Number of discrete rates to allow. Can be a positive or negative
+                integer.
+                If a positive integer greater than 0 is provided, the range of
+                speed rates will be discretized into `num_rates` values.
+                If a negative integer or 0 is provided, the full range of speed rates
+                will be sampled uniformly.
+                Note: If a positive integer is provided and the resultant discretized
+                range of rates contains the value '1.0', then those samples with rate=1.0,
+                will not be augmented at all and simply skipped. This is to avoid unnecessary
+                augmentation and increase computation time. Effective augmentation chance
+                in such a case is = `prob * (num_rates - 1 / num_rates) * 100`% chance
+                where `prob` is the global probability of a sample being augmented.
+            n_fft: Number of fft filters to be computed.
+        """
+        super(TimeStretchAugmentation, self).__init__()
+
+        if probability > 1.0 or probability < 0.0:
+            raise ValueError("`probability` must be between 0 and 1")
+
+        if not HAVE_TORCHAUDIO:
+            raise ModuleNotFoundError(
+                "torchaudio is not installed but is necessary for "
+                "TimeStretchAugmentation. We recommend you try "
+                "installing it from conda for the PyTorch version you have."
+            )
+
+        # Check torchaudio version; inform user of potential issue
+        if TORCHAUDIO_VERSION < TORCHAUDIO_VERSION_MIN:
+            logging.error(
+                "Current installed version of `torchaudio` %s is less than the recommended minimum "
+                "version of %s. Please note that this may cause deadlocks when using distributed "
+                "data parallel training. Please follow the instructions at https://github.com/pytorch/audio "
+                "to update torchaudio.",
+                str(TORCHAUDIO_VERSION),
+                str(TORCHAUDIO_VERSION_MIN),
+            )
+
+        min_rate = min(min_speed_rate, max_speed_rate)
+        if min_rate < 0.0:
+            raise ValueError("Minimum sampling rate modifier must be > 0.")
+
+        self._sample_rate = sample_rate
+        self.probability = float(probability)
+        self.min_rate = float(min_speed_rate)
+        self.max_rate = float(max_speed_rate)
+        self.num_rates = num_rates
+        if num_rates > 0:
+            self._rates = np.linspace(min_speed_rate, max_speed_rate, num_rates)
+        self._rng = np.random.RandomState()
+
+        self._n_fft = n_fft
+        self._hop_length = n_fft // 2
+        self._stft_window = torch.hann_window(self._n_fft, periodic=True, device=self._device)
+        self._phi_advance = torch.linspace(0, np.pi * self._hop_length, self._hop_length + 1, device=self._device)
+        self._phi_advance = self._phi_advance.view(-1, 1)
+
+    @torch.no_grad()
+    def forward(self, input_signal, length):
+        proba = self._rng.uniform(0.0, 1.0)
+
+        if proba > self.probability:
+            return input_signal, length
+
+        # Select speed rate either from choice or random sample
+        if self.num_rates < 0:
+            speed_rate = self._rng.uniform(self.min_rate, self.max_rate)
+        else:
+            speed_rate = np.random.choice(self._rates)
+
+        # Skip perturbation in case of identity speed rate
+        if speed_rate == 1.0:
+            return input_signal, length
+
+        features = self._stft(input_signal, self._n_fft, self._hop_length)
+        features = self._phase_vocoder(features, speed_rate)
+
+        # Predict the length of y_stretch
+        len_stretch = int(round(input_signal.shape[1] / speed_rate))
+
+        audio = self._istft(features, len_stretch)
+
+        length = (length * speed_rate).type(torch.long)
+
+        return audio, length
+
+    def _stft(self, data: torch.Tensor, n_fft: int, hop_length: int):
+        win_length = n_fft
+        window = self._stft_window
+
+        stft = torch.stft(
+            data,
+            n_fft=n_fft,
+            hop_length=hop_length,
+            win_length=win_length,
+            window=window,
+            center=True,
+            pad_mode='reflect',
+            normalized=False,
+        )
+        return stft
+
+    def _phase_vocoder(self, data: torch.Tensor, rate: float):
+        data_stretch = torchaudio.functional.phase_vocoder(data, rate, self._phi_advance)
+        return data_stretch
+
+    def _istft(self, data: torch.Tensor, len_stretch: int):
+        n_fft = 2 * (data.shape[1] - 1)
+        hop_length = self._hop_length
+        win_length = n_fft
+        window = self._stft_window
+
+        audio = torchaudio.functional.istft(
+            data,
+            n_fft,
+            hop_length,
+            win_length,
+            window=window,
+            center=True,
+            pad_mode='reflect',
+            normalized=False,
+            length=len_stretch,
+        )
+
+        return audio
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            "length": NeuralType(tuple('B'), LengthsType()),
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            "processed_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            "processed_length": NeuralType(tuple('B'), LengthsType()),
+        }
+
+
 def AudioPreprocessing(*args, **kwargs):
     raise NotImplementedError(
         "AudioPreprocessing has been deprecated and replaced by: "
diff --git a/nemo/collections/asr/beam_search_decoder.py b/nemo/collections/asr/beam_search_decoder.py
index ecebe7a00ec3..13640b2f476f 100644
--- a/nemo/collections/asr/beam_search_decoder.py
+++ b/nemo/collections/asr/beam_search_decoder.py
@@ -7,6 +7,7 @@
 from nemo.backends.pytorch.nm import NonTrainableNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 from nemo.utils.helpers import get_cuda_device
 
 
@@ -19,26 +20,24 @@ class BeamSearchDecoderWithLM(NonTrainableNM):
     in that list is a tuple of (final_log_prob, hyp_string).
 
     Args:
-        vocab (list): List of characters that can be output by the ASR model.
-            For Jasper, this is the 28 character set {a-z '}. The CTC blank
-            symbol is automatically added later for models using ctc.
-        beam_width (int): Size of beams to keep and expand upon. Larger beams
-            result in more accurate but slower predictions
-        alpha (float): The amount of importance to place on the n-gram language
-            model. Larger alpha means more importance on the LM and less
-            importance on the acoustic model (Jasper).
-        beta (float): A penalty term given to longer word sequences. Larger
-            beta will result in shorter sequences.
+        vocab (list): List of characters that can be output by the ASR model. For Jasper, this is the 28 character set
+            {a-z '}. The CTC blank symbol is automatically added later for models using ctc.
+        beam_width (int): Size of beams to keep and expand upon. Larger beams result in more accurate but slower
+            predictions
+        alpha (float): The amount of importance to place on the n-gram language model. Larger alpha means more
+            importance on the LM and less importance on the acoustic model (Jasper).
+        beta (float): A penalty term given to longer word sequences. Larger beta will result in shorter sequences.
         lm_path (str): Path to n-gram language model
         num_cpus (int): Number of cpus to use
-        cutoff_prob (float): Cutoff probability in vocabulary pruning,
-            default 1.0, no pruning
-        cutoff_top_n (int): Cutoff number in pruning, only top cutoff_top_n
-            characters with highest probs in vocabulary will be used in
-            beam search, default 40.
+        cutoff_prob (float): Cutoff probability in vocabulary pruning, default 1.0, no pruning
+        cutoff_top_n (int): Cutoff number in pruning, only top cutoff_top_n characters with highest probs in
+            vocabulary will be used in beam search, default 40.
+        input_tensor (bool): Set to True if you intend to pass pytorch Tensors, set to False if you intend to pass
+            numpy arrays.
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -50,6 +49,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -59,7 +59,9 @@ def output_ports(self):
         # return {"predictions": NeuralType(VoidType())}
         return {"predictions": NeuralType(('B', 'T'), PredictionsType())}
 
-    def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40):
+    def __init__(
+        self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_prob=1.0, cutoff_top_n=40, input_tensor=True
+    ):
 
         try:
             from ctc_decoders import Scorer
@@ -86,12 +88,15 @@ def __init__(self, vocab, beam_width, alpha, beta, lm_path, num_cpus, cutoff_pro
         self.num_cpus = num_cpus
         self.cutoff_prob = cutoff_prob
         self.cutoff_top_n = cutoff_top_n
+        self.input_tensor = input_tensor
 
     def forward(self, log_probs, log_probs_length):
-        probs = torch.exp(log_probs)
-        probs_list = []
-        for i, prob in enumerate(probs):
-            probs_list.append(prob[: log_probs_length[i], :])
+        probs_list = log_probs
+        if self.input_tensor:
+            probs = torch.exp(log_probs)
+            probs_list = []
+            for i, prob in enumerate(probs):
+                probs_list.append(prob[: log_probs_length[i], :])
         res = self.beam_search_func(
             probs_list,
             self.vocab,
diff --git a/nemo/collections/asr/contextnet.py b/nemo/collections/asr/contextnet.py
new file mode 100644
index 000000000000..145a6d79718a
--- /dev/null
+++ b/nemo/collections/asr/contextnet.py
@@ -0,0 +1,211 @@
+# Copyright (c) 2019 NVIDIA Corporation
+from typing import Any, Dict, List, Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .jasper import JasperEncoder
+from .parts.jasper import init_weights
+from nemo.backends.pytorch.nm import TrainableNM
+from nemo.core.neural_types import *
+from nemo.utils import logging
+from nemo.utils.decorators import add_port_docs
+
+
+class ContextNetEncoder(JasperEncoder):
+    """
+    ContextNet Encoder creates the pre-processing (prologue), QuartzNet convolution
+    block, and the additional pre and post processing layers as described in
+    ContextNet (https://arxiv.org/abs/2005.03191)
+
+    Args:
+        jasper (list): A list of dictionaries. Each element in the list
+            represents the configuration of one Jasper Block. Each element
+            should contain::
+
+                {
+                    # Required parameters
+                    'filters' (int) # Number of output channels,
+                    'repeat' (int) # Number of sub-blocks,
+                    'kernel' (int) # Size of conv kernel,
+                    'stride' (int) # Conv stride
+                    'dilation' (int) # Conv dilation
+                    'dropout' (float) # Dropout probability
+                    'residual' (bool) # Whether to use residual or not.
+                    # Optional parameters
+                    'residual_dense' (bool) # Whether to use Dense Residuals
+                        # or not. 'residual' must be True for 'residual_dense'
+                        # to be enabled.
+                        # Defaults to False.
+                    'separable' (bool) # Whether to use separable convolutions.
+                        # Defaults to False
+                    'groups' (int) # Number of groups in each conv layer.
+                        # Defaults to 1
+                    'heads' (int) # Sharing of separable filters
+                        # Defaults to -1
+                    'tied' (bool)  # Whether to use the same weights for all
+                        # sub-blocks.
+                        # Defaults to False
+                    'se' (bool)  # Whether to add Squeeze and Excitation
+                        # sub-blocks.
+                        # Defaults to False
+                    'se_reduction_ratio' (int)  # The reduction ratio of the Squeeze
+                        # sub-module.
+                        # Must be an integer > 1.
+                        # Defaults to 8.
+                    'se_context_window' (int) # The size of the temporal context
+                        # provided to SE sub-module.
+                        # Must be an integer. If value <= 0, will perform global
+                        # temporal pooling (global context).
+                        # If value >= 1, will perform stride 1 average pooling to
+                        # compute context window.
+                    'se_interpolation_mode' (str) # Interpolation mode of timestep dimension.
+                        # Used only if context window is > 1.
+                        # The modes available for resizing are: `nearest`, `linear` (3D-only),
+                        # `bilinear`, `area`
+                    'kernel_size_factor' (float)  # Conv kernel size multiplier
+                        # Can be either an int or float
+                        # Kernel size is recomputed as below:
+                        # new_kernel_size = int(max(1, (kernel_size * kernel_width)))
+                        # to prevent kernel sizes than 1.
+                        # Note: If rescaled kernel size is an even integer,
+                        # adds 1 to the rescaled kernel size to allow "same"
+                        # padding.
+                    'stride_last' (bool) # Bool flag to determine whether each
+                        # of the the repeated sub-blockss will perform a stride,
+                        # or only the last sub-block will perform a strided convolution.
+                }
+
+        activation (str): Activation function used for each sub-blocks. Can be
+            one of ["hardtanh", "relu", "selu", "swish"].
+        feat_in (int): Number of channels being input to this module
+        normalization_mode (str): Normalization to be used in each sub-block.
+            Can be one of ["batch", "layer", "instance", "group"]
+            Defaults to "batch".
+        residual_mode (str): Type of residual connection.
+            Can be "add", "stride_add" or "max".
+            "stride_add" mode performs strided convolution prior to residual
+            addition.
+            Defaults to "add".
+        norm_groups (int): Number of groups for "group" normalization type.
+            If set to -1, number of channels is used.
+            Defaults to -1.
+        conv_mask (bool): Controls the use of sequence length masking prior
+            to convolutions.
+            Defaults to True.
+        frame_splicing (int): Defaults to 1.
+        init_mode (str): Describes how neural network parameters are
+            initialized. Options are ['xavier_uniform', 'xavier_normal',
+            'kaiming_uniform','kaiming_normal'].
+            Defaults to "xavier_uniform".
+    """
+
+    length: Optional[torch.Tensor]
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            # "audio_signal": NeuralType(
+            #    {0: AxisType(BatchTag), 1: AxisType(SpectrogramSignalTag), 2: AxisType(ProcessedTimeTag),}
+            # ),
+            # "length": NeuralType({0: AxisType(BatchTag)}),
+            "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()),
+            "length": NeuralType(tuple('B'), LengthsType()),
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            # "outputs": NeuralType(
+            #    {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),}
+            # ),
+            # "encoded_lengths": NeuralType({0: AxisType(BatchTag)}),
+            "outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
+            "encoded_lengths": NeuralType(tuple('B'), LengthsType()),
+        }
+
+    def __init__(
+        self,
+        jasper: List[Dict[str, Any]],
+        activation: str,
+        feat_in: int,
+        normalization_mode: str = "batch",
+        residual_mode: str = "add",
+        norm_groups: int = -1,
+        conv_mask: bool = False,
+        frame_splicing: int = 1,
+        init_mode: str = 'xavier_uniform',
+    ):
+        super().__init__(
+            jasper=jasper,
+            activation=activation,
+            feat_in=feat_in,
+            normalization_mode=normalization_mode,
+            residual_mode=residual_mode,
+            norm_groups=norm_groups,
+            conv_mask=conv_mask,
+            frame_splicing=frame_splicing,
+            init_mode=init_mode,
+        )
+
+
+class ContextNetDecoderForCTC(TrainableNM):
+    """
+    ContextNet Decoder creates the final layer in ContextNet that maps from the outputs
+    of ContextNet Encoder to the vocabulary of interest.
+
+    Args:
+        feat_in (int): Number of channels being input to this module
+        num_classes (int): Number of characters in ASR model's vocab/labels.
+            This count should not include the CTC blank symbol.
+        hidden_size (int): Number of units in the hidden state of the LSTM RNN.
+        init_mode (str): Describes how neural network parameters are
+            initialized. Options are ['xavier_uniform', 'xavier_normal',
+            'kaiming_uniform','kaiming_normal'].
+            Defaults to "xavier_uniform".
+    """
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            # "encoder_output": NeuralType(
+            #    {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),}
+            # )
+            "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation())
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        # return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})}
+        return {"output": NeuralType(('B', 'T', 'D'), LogprobsType())}
+
+    def __init__(self, feat_in: int, num_classes: int, hidden_size: int = 640, init_mode: str = "xavier_uniform"):
+        super().__init__()
+
+        self._feat_in = feat_in
+        # Add 1 for blank char
+        self._num_classes = num_classes + 1
+
+        self.rnn = nn.LSTM(feat_in, hidden_size, bias=True, batch_first=True)
+        self.clf = nn.Linear(hidden_size, self._num_classes)
+        self.clf.apply(lambda x: init_weights(x, mode=init_mode))
+        self.to(self._device)
+
+    def forward(self, encoder_output):
+        encoder_output = encoder_output.transpose(1, 2)  # [B, T, D]
+        output, states = self.rnn(encoder_output)
+        logits = self.clf(output)
+        return F.log_softmax(logits, dim=-1)
diff --git a/nemo/collections/asr/data_layer.py b/nemo/collections/asr/data_layer.py
index e2b95c0e9604..dbaba86c3190 100644
--- a/nemo/collections/asr/data_layer.py
+++ b/nemo/collections/asr/data_layer.py
@@ -14,25 +14,178 @@
 # =============================================================================
 """This package contains Neural Modules responsible for ASR data layers."""
 
+import copy
+import io
+import os
 from functools import partial
+from typing import Any, Dict, List, Optional, Union
 
+import braceexpand
 import torch
-
-import nemo
-from .parts.dataset import AudioDataset, KaldiFeatureDataset, TranscriptDataset, seq_collate_fn
+import webdataset as wd
+
+from .parts.collections import ASRAudioText
+from .parts.dataset import (
+    AudioDataset,
+    AudioLabelDataset,
+    KaldiFeatureDataset,
+    TranscriptDataset,
+    fixed_seq_collate_fn,
+    seq_collate_fn,
+)
 from .parts.features import WaveformFeaturizer
+from .parts.parsers import make_parser
+from .parts.perturb import AudioAugmentor, perturbation_types
 from nemo.backends.pytorch import DataLayerNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import *
+from nemo.utils import logging
+from nemo.utils.decorators import add_port_docs
 from nemo.utils.misc import pad_to
 
 __all__ = [
     'AudioToTextDataLayer',
+    'TarredAudioToTextDataLayer',
     'KaldiFeatureDataLayer',
     'TranscriptDataLayer',
+    'AudioToSpeechLabelDataLayer',
 ]
 
 
+def _process_augmentations(augmenter) -> AudioAugmentor:
+    """Process list of online data augmentations.
+
+    Accepts either an AudioAugmentor object with pre-defined augmentations,
+    or a dictionary that points to augmentations that have been defined.
+
+    If a dictionary is passed, must follow the below structure:
+    Dict[str, Dict[str, Any]]: Which refers to a dictionary of string
+    names for augmentations, defined in `asr/parts/perturb.py`.
+
+    The inner dictionary may contain key-value arguments of the specific
+    augmentation, along with an essential key `prob`. `prob` declares the
+    probability of the augmentation being applied, and must be a float
+    value in the range [0, 1].
+
+    # Example in YAML config file
+
+    Augmentations are generally applied only during training, so we can add
+    these augmentations to our yaml config file, and modify the behaviour
+    for training and evaluation.
+
+    ```yaml
+    AudioToSpeechLabelDataLayer:
+        ...  # Parameters shared between train and evaluation time
+        train:
+            augmentor:
+                shift:
+                    prob: 0.5
+                    min_shift_ms: -5.0
+                    max_shift_ms: 5.0
+                white_noise:
+                    prob: 1.0
+                    min_level: -90
+                    max_level: -46
+                ...
+        eval:
+            ...
+    ```
+
+    Then in the training script,
+
+    ```python
+    import copy
+    from ruamel.yaml import YAML
+
+    yaml = YAML(typ="safe")
+    with open(model_config) as f:
+        params = yaml.load(f)
+
+    # Train Config for Data Loader
+    train_dl_params = copy.deepcopy(params["AudioToTextDataLayer"])
+    train_dl_params.update(params["AudioToTextDataLayer"]["train"])
+    del train_dl_params["train"]
+    del train_dl_params["eval"]
+
+    data_layer_train = nemo_asr.AudioToTextDataLayer(
+        ...,
+        **train_dl_params,
+    )
+
+    # Evaluation Config for Data Loader
+    eval_dl_params = copy.deepcopy(params["AudioToTextDataLayer"])
+    eval_dl_params.update(params["AudioToTextDataLayer"]["eval"])
+    del eval_dl_params["train"]
+    del eval_dl_params["eval"]
+
+    data_layer_eval = nemo_asr.AudioToTextDataLayer(
+        ...,
+        **eval_dl_params,
+    )
+    ```
+
+    # Registering your own Augmentations
+
+    To register custom augmentations to obtain the above convenience of
+    the declaring the augmentations in YAML, you can put additional keys in
+    `perturbation_types` dictionary as follows.
+
+    ```python
+    from nemo.collections.asr.parts import perturb
+
+    # Define your own perturbation here
+    class CustomPerturbation(perturb.Perturbation):
+        ...
+
+    perturb.register_perturbation(name_of_perturbation, CustomPerturbation)
+    ```
+
+    Args:
+        augmenter: AudioAugmentor object or
+            dictionary of str -> kwargs (dict) which is parsed and used
+            to initialize an AudioAugmentor.
+            Note: It is crucial that each individual augmentation has
+            a keyword `prob`, that defines a float probability in the
+            the range [0, 1] of this augmentation being applied.
+            If this keyword is not present, then the augmentation is
+            disabled and a warning is logged.
+
+    Returns: AudioAugmentor object
+    """
+    if isinstance(augmenter, AudioAugmentor):
+        return augmenter
+
+    if not type(augmenter) == dict:
+        raise ValueError("Cannot parse augmenter. Must be a dict or an AudioAugmentor object ")
+
+    augmenter = copy.deepcopy(augmenter)
+
+    augmentations = []
+    for augment_name, augment_kwargs in augmenter.items():
+        prob = augment_kwargs.get('prob', None)
+
+        if prob is None:
+            raise KeyError(
+                f'Augmentation "{augment_name}" will not be applied as '
+                f'keyword argument "prob" was not defined for this augmentation.'
+            )
+
+        else:
+            _ = augment_kwargs.pop('prob')
+
+            if prob < 0.0 or prob > 1.0:
+                raise ValueError("`prob` must be a float value between 0 and 1.")
+
+            try:
+                augmentation = perturbation_types[augment_name](**augment_kwargs)
+                augmentations.append([prob, augmentation])
+            except KeyError:
+                raise KeyError(f"Invalid perturbation name. Allowed values : {perturbation_types.keys()}")
+
+    augmenter = AudioAugmentor(perturbations=augmentations)
+    return augmenter
+
+
 class AudioToTextDataLayer(DataLayerNM):
     """Data Layer for general ASR tasks.
 
@@ -60,9 +213,15 @@ class AudioToTextDataLayer(DataLayerNM):
         int_values (bool): Bool indicating whether the audio file is saved as
             int data or float data.
             Defaults to False.
+        bos_id (id): Dataset parameter.
+            Beginning of string symbol id used for seq2seq models.
+            Defaults to None.
         eos_id (id): Dataset parameter.
             End of string symbol id used for seq2seq models.
             Defaults to None.
+        pad_id (id): Token used to pad when collating samples in batches.
+            If this is None, pads using 0s.
+            Defaults to None.
         min_duration (float): Dataset parameter.
             All training files which have a duration less than min_duration
             are dropped. Note: Duration is read from the manifest JSON.
@@ -89,9 +248,18 @@ class AudioToTextDataLayer(DataLayerNM):
         num_workers (int): See PyTorch DataLoader.
             Defaults to 0.
         perturb_config (dict): Currently disabled.
+        augmentor (AudioAugmentor or dict): Optional AudioAugmentor or
+            dictionary of str -> kwargs (dict) which is parsed and used
+            to initialize an AudioAugmentor.
+            Note: It is crucial that each individual augmentation has
+            a keyword `prob`, that defines a float probability in the
+            the range [0, 1] of this augmentation being applied.
+            If this keyword is not present, then the augmentation is
+            disabled and a warning is logged.
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -100,7 +268,12 @@ def output_ports(self):
             # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}),
             # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             # 'transcript_length': NeuralType({0: AxisType(BatchTag)}),
-            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            'audio_signal': NeuralType(
+                ('B', 'T'),
+                AudioSignal(freq=self._sample_rate)
+                if self is not None and self._sample_rate is not None
+                else AudioSignal(),
+            ),
             'a_sig_length': NeuralType(tuple('B'), LengthsType()),
             'transcripts': NeuralType(('B', 'T'), LabelsType()),
             'transcript_length': NeuralType(tuple('B'), LengthsType()),
@@ -124,10 +297,17 @@ def __init__(
         drop_last=False,
         shuffle=True,
         num_workers=0,
+        augmentor: Optional[Union[AudioAugmentor, Dict[str, Dict[str, Any]]]] = None,
     ):
         super().__init__()
         self._sample_rate = sample_rate
-        self._featurizer = WaveformFeaturizer(sample_rate=self._sample_rate, int_values=int_values, augmentor=None)
+
+        if augmentor is not None:
+            augmentor = _process_augmentations(augmentor)
+
+        self._featurizer = WaveformFeaturizer(
+            sample_rate=self._sample_rate, int_values=int_values, augmentor=augmentor
+        )
 
         # Set up dataset
         dataset_params = {
@@ -143,14 +323,18 @@ def __init__(
             'load_audio': load_audio,
         }
         self._dataset = AudioDataset(**dataset_params)
+        self._batch_size = batch_size
 
         # Set up data loader
         if self._placement == DeviceType.AllGpu:
-            nemo.logging.info("Parallelizing Datalayer.")
+            logging.info("Parallelizing Datalayer.")
             sampler = torch.utils.data.distributed.DistributedSampler(self._dataset)
         else:
             sampler = None
 
+        if batch_size == -1:
+            batch_size = len(self._dataset)
+
         pad_id = 0 if pad_id is None else pad_id
         self._dataloader = torch.utils.data.DataLoader(
             dataset=self._dataset,
@@ -174,6 +358,240 @@ def data_iterator(self):
         return self._dataloader
 
 
+class TarredAudioToTextDataLayer(DataLayerNM):
+    """Data Layer for general ASR tasks, where the audio files are tarred.
+
+    Module which reads ASR labeled data. It accepts a single comma-separated JSON manifest file, as well as the
+    path(s) to the tarball(s) with the wav files. Each line of the manifest should contain the information for one
+    audio file, including at least the transcript and name of the audio file (doesn't have to be exact, only the
+    basename must be the same).
+
+    Valid formats for the audio_tar_filepaths argument include (1) a single string that can be brace-expanded,
+    e.g. 'path/to/audio.tar' or 'path/to/audio_{1..100}.tar.gz', or (2) a list of file paths that will not be
+    brace-expanded, e.g. ['audio_1.tar', 'audio_2.tar', ...]. See the WebDataset documentation for more information
+    about accepted data and input formats.
+
+    If using torch.distributed, the number of shards should be divisible by the number of workers to ensure an
+    even split among workers. If it is not divisible, logging will give a warning but we will continue.
+
+    Notice that a few arguments are different from the AudioToTextDataLayer; for example, shuffle (bool) has been
+    replaced by shuffle_n (int).
+
+    Additionally, please note that the len() of this DataLayer is assumed to be the length of the manifest. Be aware
+    of this especially if the tarred audio is a subset of the samples represented in the manifest.
+
+    Args:
+        audio_tar_filepaths: Either a list of audio tarball filepaths, or a
+            string (can be brace-expandable).
+        manifest_filepath (str): Path to the manifest.
+        labels (list): List of characters that can be output by the ASR model.
+            For Jasper, this is the 28 character set {a-z '}. The CTC blank
+            symbol is automatically added later for models using ctc.
+        batch_size (int): batch size
+        sample_rate (int): Target sampling rate for data. Audio files will be
+            resampled to sample_rate if it is not already.
+            Defaults to 16000.
+        int_values (bool): Bool indicating whether the audio file is saved as
+            int data or float data.
+            Defaults to False.
+        bos_id (id): Dataset parameter.
+            Beginning of string symbol id used for seq2seq models.
+            Defaults to None.
+        eos_id (id): Dataset parameter.
+            End of string symbol id used for seq2seq models.
+            Defaults to None.
+        pad_id (id): Token used to pad when collating samples in batches.
+            If this is None, pads using 0s.
+            Defaults to None.
+        min_duration (float): Dataset parameter.
+            All training files which have a duration less than min_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to 0.1.
+        max_duration (float): Dataset parameter.
+            All training files which have a duration more than max_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to None.
+        normalize_transcripts (bool): Dataset parameter.
+            Whether to use automatic text cleaning.
+            It is highly recommended to manually clean text for best results.
+            Defaults to True.
+        trim_silence (bool): Whether to use trim silence from beginning and end
+            of audio signal using librosa.effects.trim().
+            Defaults to False.
+        shuffle_n (int): How many samples to look ahead and load to be shuffled.
+            See WebDataset documentation for more details.
+            Defaults to 0.
+        num_workers (int): See PyTorch DataLoader. Defaults to 0.
+        augmentor (AudioAugmentor or dict): Optional AudioAugmentor or
+            dictionary of str -> kwargs (dict) which is parsed and used
+            to initialize an AudioAugmentor.
+            Note: It is crucial that each individual augmentation has
+            a keyword `prob`, that defines a float probability in the
+            the range [0, 1] of this augmentation being applied.
+            If this keyword is not present, then the augmentation is
+            disabled and a warning is logged.
+    """
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            'audio_signal': NeuralType(
+                ('B', 'T'),
+                AudioSignal(freq=self._sample_rate)
+                if self is not None and self._sample_rate is not None
+                else AudioSignal(),
+            ),
+            'a_sig_length': NeuralType(tuple('B'), LengthsType()),
+            'transcripts': NeuralType(('B', 'T'), LabelsType()),
+            'transcript_length': NeuralType(tuple('B'), LengthsType()),
+        }
+
+    def __init__(
+        self,
+        audio_tar_filepaths,
+        manifest_filepath,
+        labels,
+        batch_size,
+        sample_rate=16000,
+        int_values=False,
+        bos_id=None,
+        eos_id=None,
+        pad_id=None,
+        min_duration=0.1,
+        max_duration=None,
+        normalize_transcripts=True,
+        trim_silence=False,
+        shuffle_n=0,
+        num_workers=0,
+        augmentor: Optional[Union[AudioAugmentor, Dict[str, Dict[str, Any]]]] = None,
+    ):
+        super().__init__()
+        self._sample_rate = sample_rate
+
+        if augmentor is not None:
+            augmentor = _process_augmentations(augmentor)
+
+        self.collection = ASRAudioText(
+            manifests_files=manifest_filepath.split(','),
+            parser=make_parser(labels=labels, name='en', do_normalize=normalize_transcripts),
+            min_duration=min_duration,
+            max_duration=max_duration,
+            index_by_file_id=True,  # Must set this so the manifest lines can be indexed by file ID
+        )
+
+        self.featurizer = WaveformFeaturizer(sample_rate=self._sample_rate, int_values=int_values, augmentor=augmentor)
+
+        self.trim = trim_silence
+        self.eos_id = eos_id
+        self.bos_id = bos_id
+
+        # Used in creating a sampler (in Actions).
+        self._batch_size = batch_size
+        self._num_workers = num_workers
+        pad_id = 0 if pad_id is None else pad_id
+        self.collate_fn = partial(seq_collate_fn, token_pad_value=pad_id)
+
+        # Check for distributed and partition shards accordingly
+        if torch.distributed.is_available() and torch.distributed.is_initialized():
+            global_rank = torch.distributed.get_rank()
+            world_size = torch.distributed.get_world_size()
+
+            if isinstance(audio_tar_filepaths, str):
+                audio_tar_filepaths = list(braceexpand.braceexpand(audio_tar_filepaths))
+
+            if len(audio_tar_filepaths) % world_size != 0:
+                logging.warning(
+                    f"Number of shards in tarred dataset ({len(audio_tar_filepaths)}) is not divisible "
+                    f"by number of distributed workers ({world_size})."
+                )
+
+            begin_idx = (len(audio_tar_filepaths) // world_size) * global_rank
+            end_idx = begin_idx + (len(audio_tar_filepaths) // world_size)
+            audio_tar_filepaths = audio_tar_filepaths[begin_idx:end_idx]
+
+        # Put together WebDataset
+        self._dataset = (
+            wd.Dataset(audio_tar_filepaths)
+            .shuffle(shuffle_n)
+            .rename(audio='wav', key='__key__')
+            .to_tuple('audio', 'key')
+            .pipe(self._filter)
+            .map(f=self._build_sample)
+        )
+
+    def _filter(self, iterator):
+        """Used to remove samples that have been filtered out by ASRAudioText already.
+        Otherwise, we would get a KeyError as _build_sample attempts to find the manifest entry for a sample
+        that was filtered out (e.g. for duration).
+        """
+
+        class TarredAudioFilter:
+            def __init__(self, collection):
+                self.iterator = iterator
+                self.collection = collection
+
+            def __iter__(self):
+                return self
+
+            def __next__(self):
+                while True:
+                    audio_bytes, audio_filename = next(self.iterator)
+                    file_id, _ = os.path.splitext(os.path.basename(audio_filename))
+                    if file_id in self.collection.mapping:
+                        return audio_bytes, audio_filename
+
+        return TarredAudioFilter(self.collection)
+
+    def _build_sample(self, tup):
+        """Builds the training sample by combining the data from the WebDataset with the manifest info.
+        """
+        audio_bytes, audio_filename = tup
+
+        # Grab manifest entry from self.collection
+        file_id, _ = os.path.splitext(os.path.basename(audio_filename))
+        manifest_idx = self.collection.mapping[file_id]
+        manifest_entry = self.collection[manifest_idx]
+
+        offset = manifest_entry.offset
+        if offset is None:
+            offset = 0
+
+        # Convert audio bytes to IO stream for processing (for SoundFile to read)
+        audio_filestream = io.BytesIO(audio_bytes)
+        features = self.featurizer.process(
+            audio_filestream, offset=offset, duration=manifest_entry.duration, trim=self.trim,
+        )
+        audio_filestream.close()
+
+        # Audio features
+        f, fl = features, torch.tensor(features.shape[0]).long()
+
+        # Text features
+        t, tl = manifest_entry.text_tokens, len(manifest_entry.text_tokens)
+        if self.bos_id is not None:
+            t = [self.bos_id] + t
+            tl += 1
+        if self.eos_id is not None:
+            t = t + [self.eos_id]
+            tl += 1
+
+        return f, fl, torch.tensor(t).long(), torch.tensor(tl).long()
+
+    def __len__(self):
+        return len(self.collection)
+
+    @property
+    def dataset(self):
+        return self._dataset
+
+    @property
+    def data_iterator(self):
+        return None
+
+
 class KaldiFeatureDataLayer(DataLayerNM):
     """Data layer for reading generic Kaldi-formatted data.
 
@@ -209,6 +627,7 @@ class KaldiFeatureDataLayer(DataLayerNM):
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -252,7 +671,7 @@ def __init__(
 
         # Set up data loader
         if self._placement == DeviceType.AllGpu:
-            nemo.logging.info("Parallelizing DATALAYER")
+            logging.info("Parallelizing DATALAYER")
             sampler = torch.utils.data.distributed.DistributedSampler(self._dataset)
         else:
             sampler = None
@@ -335,6 +754,7 @@ class TranscriptDataLayer(DataLayerNM):
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -350,7 +770,7 @@ def output_ports(self):
         return {
             # 'texts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             # 'texts_length': NeuralType({0: AxisType(BatchTag)}),
-            'texts': NeuralType(('B', 'T'), ChannelType()),
+            'texts': NeuralType(('B', 'T'), LabelsType()),
             'texts_length': NeuralType(tuple('B'), LengthsType()),
         }
 
@@ -425,3 +845,149 @@ def dataset(self):
     @property
     def data_iterator(self):
         return self._dataloader
+
+
+# Ported from https://github.com/NVIDIA/OpenSeq2Seq/blob/master/open_seq2seq/data/speech2text/speech_commands.py
+class AudioToSpeechLabelDataLayer(DataLayerNM):
+    """Data Layer for general speech classification.
+
+    Module which reads speech recognition with target label. It accepts comma-separated
+    JSON manifest files describing the correspondence between wav audio files
+    and their target labels. JSON files should be of the following format::
+
+        {"audio_filepath": path_to_wav_0, "duration": time_in_sec_0, "label": \
+target_label_0, "offset": offset_in_sec_0}
+        ...
+        {"audio_filepath": path_to_wav_n, "duration": time_in_sec_n, "label": \
+target_label_n, "offset": offset_in_sec_n}
+
+    Args:
+        manifest_filepath (str): Dataset parameter.
+            Path to JSON containing data.
+        labels (list): Dataset parameter.
+            List of target classes that can be output by the speech recognition model.
+        batch_size (int): batch size
+        sample_rate (int): Target sampling rate for data. Audio files will be
+            resampled to sample_rate if it is not already.
+            Defaults to 16000.
+        int_values (bool): Bool indicating whether the audio file is saved as
+            int data or float data.
+            Defaults to False.
+        min_duration (float): Dataset parameter.
+            All training files which have a duration less than min_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to 0.1.
+        max_duration (float): Dataset parameter.
+            All training files which have a duration more than max_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to None.
+        trim_silence (bool): Whether to use trim silence from beginning and end
+            of audio signal using librosa.effects.trim().
+            Defaults to False.
+        load_audio (bool): Dataset parameter.
+            Controls whether the dataloader loads the audio signal and
+            transcript or just the transcript.
+            Defaults to True.
+        drop_last (bool): See PyTorch DataLoader.
+            Defaults to False.
+        shuffle (bool): See PyTorch DataLoader.
+            Defaults to True.
+        num_workers (int): See PyTorch DataLoader.
+            Defaults to 0.
+        augmenter (AudioAugmentor or dict): Optional AudioAugmentor or
+            dictionary of str -> kwargs (dict) which is parsed and used
+            to initialize an AudioAugmentor.
+            Note: It is crucial that each individual augmentation has
+            a keyword `prob`, that defines a float probability in the
+            the range [0, 1] of this augmentation being applied.
+            If this keyword is not present, then the augmentation is
+            disabled and a warning is logged.
+        time_length (int): max seconds to consider in a batch # Pass this only for speaker recognition task
+    """
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        return {
+            'audio_signal': NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
+            'a_sig_length': NeuralType(tuple('B'), LengthsType()),
+            'label': NeuralType(tuple('B'), LabelsType()),
+            'label_length': NeuralType(tuple('B'), LengthsType()),
+        }
+
+    def __init__(
+        self,
+        *,
+        manifest_filepath: str,
+        labels: List[str],
+        batch_size: int,
+        sample_rate: int = 16000,
+        int_values: bool = False,
+        num_workers: int = 0,
+        shuffle: bool = True,
+        min_duration: Optional[float] = 0.1,
+        max_duration: Optional[float] = None,
+        trim_silence: bool = False,
+        drop_last: bool = False,
+        load_audio: bool = True,
+        augmentor: Optional[Union[AudioAugmentor, Dict[str, Dict[str, Any]]]] = None,
+        time_length: int = 0,
+    ):
+        super(AudioToSpeechLabelDataLayer, self).__init__()
+
+        self._manifest_filepath = manifest_filepath
+        self._labels = labels
+        self._sample_rate = sample_rate
+
+        if augmentor is not None:
+            augmentor = _process_augmentations(augmentor)
+
+        self._featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=augmentor)
+
+        dataset_params = {
+            'manifest_filepath': manifest_filepath,
+            'labels': labels,
+            'featurizer': self._featurizer,
+            'max_duration': max_duration,
+            'min_duration': min_duration,
+            'trim': trim_silence,
+            'load_audio': load_audio,
+        }
+        self._dataset = AudioLabelDataset(**dataset_params)
+
+        self.num_classes = self._dataset.num_commands
+        logging.info("# of classes :{}".format(self.num_classes))
+        self.labels = self._dataset.labels
+        # Set up data loader
+        if self._placement == DeviceType.AllGpu:
+            logging.info("Parallelizing Datalayer.")
+            sampler = torch.utils.data.distributed.DistributedSampler(self._dataset)
+        else:
+            sampler = None
+
+        if time_length:
+            collate_func = partial(fixed_seq_collate_fn, fixed_length=time_length * self._sample_rate)
+        else:
+            collate_func = partial(seq_collate_fn, token_pad_value=0)
+
+        self._dataloader = torch.utils.data.DataLoader(
+            dataset=self._dataset,
+            batch_size=batch_size,
+            collate_fn=collate_func,
+            drop_last=drop_last,
+            shuffle=shuffle if sampler is None else False,
+            sampler=sampler,
+            num_workers=num_workers,
+        )
+
+    def __len__(self):
+        return len(self._dataset)
+
+    @property
+    def dataset(self):
+        return None
+
+    @property
+    def data_iterator(self):
+        return self._dataloader
diff --git a/nemo/collections/asr/greedy_ctc_decoder.py b/nemo/collections/asr/greedy_ctc_decoder.py
index 2d49011e7235..c4a264f10832 100644
--- a/nemo/collections/asr/greedy_ctc_decoder.py
+++ b/nemo/collections/asr/greedy_ctc_decoder.py
@@ -1,33 +1,50 @@
-# Copyright (c) 2019 NVIDIA Corporation
-import torch
+# -*- coding: utf-8 -*-
 
-from nemo.backends.pytorch.nm import TrainableNM
-from nemo.core.neural_types import *
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
+from nemo.backends.pytorch.nm import NonTrainableNM
+from nemo.core.neural_types import LogprobsType, NeuralType, PredictionsType
+from nemo.utils.decorators import add_port_docs
 
-class GreedyCTCDecoder(TrainableNM):
+
+class GreedyCTCDecoder(NonTrainableNM):
     """
     Greedy decoder that computes the argmax over a softmax distribution
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
-        """Returns definitions of module input ports.
+        """Returns:
+            Definitions of module input ports.
         """
-        # return {"log_probs": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})}
         return {"log_probs": NeuralType(('B', 'T', 'D'), LogprobsType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
-        """Returns definitions of module output ports.
+        """Returns:
+            Definitions of module output ports.
         """
-        # return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
         return {"predictions": NeuralType(('B', 'T'), PredictionsType())}
 
     def __init__(self):
         super().__init__()
 
     def forward(self, log_probs):
-        with torch.no_grad():
-            argmx = log_probs.argmax(dim=-1, keepdim=False)
-            return argmx
+        argmx = log_probs.argmax(dim=-1, keepdim=False)
+        return argmx
diff --git a/nemo/collections/asr/helpers.py b/nemo/collections/asr/helpers.py
index 3c920c58dda9..dd36cd412e1e 100644
--- a/nemo/collections/asr/helpers.py
+++ b/nemo/collections/asr/helpers.py
@@ -2,8 +2,8 @@
 
 import torch
 
-import nemo
-from .metrics import word_error_rate
+from .metrics import classification_accuracy, word_error_rate
+from nemo.utils import logging
 
 
 def __ctc_decoder_predictions_tensor(tensor, labels):
@@ -67,10 +67,45 @@ def monitor_asr_train_progress(tensors: list, labels: list, eval_metric='WER', t
     wer = word_error_rate(hypotheses, references, use_cer=use_cer)
     if tb_logger is not None:
         tb_logger.add_scalar(tag, wer)
-    nemo.logging.info(f'Loss: {tensors[0]}')
-    nemo.logging.info(f'{tag}: {wer * 100 : 5.2f}%')
-    nemo.logging.info(f'Prediction: {hypotheses[0]}')
-    nemo.logging.info(f'Reference: {references[0]}')
+    logging.info(f'Loss: {tensors[0]}')
+    logging.info(f'{tag}: {wer * 100 : 5.2f}%')
+    logging.info(f'Prediction: {hypotheses[0]}')
+    logging.info(f'Reference: {references[0]}')
+
+
+def monitor_classification_training_progress(tensors: list, eval_metric=None, tb_logger=None):
+    """
+    Computes the top k classification accuracy of the model being trained.
+    Prints sample to screen, computes and  and logs a list of top k accuracies
+    to console and (optionally) Tensorboard
+    Args:
+      tensors: A list of 3 tensors (loss, logits, targets)
+      eval_metric: An optional list of integers detailing Top@`k`
+        in the range [1, max_classes]. Defaults to [1] if not set.
+      tb_logger: Tensorboard logging object
+    Returns:
+      None
+    """
+    if eval_metric is None:
+        eval_metric = [1]
+
+    if type(eval_metric) not in (list, tuple):
+        eval_metric = [eval_metric]
+
+    top_k = eval_metric
+
+    with torch.no_grad():
+        logits, targets = tensors[1:]
+        topk_acc = classification_accuracy(logits, targets, top_k=top_k)
+
+    tag = 'training_batch_top@{0}'
+    logging.info(f'Loss: {tensors[0]}')
+
+    for k, acc in zip(top_k, topk_acc):
+        if tb_logger is not None:
+            tb_logger.add_scalar(tag.format(k), acc)
+
+        logging.info(f"{tag.format(k)}: {acc * 100.: 3.4f}")
 
 
 def __gather_losses(losses_list: list) -> list:
@@ -145,12 +180,12 @@ def process_evaluation_epoch(global_vars: dict, eval_metric='WER', tag=None):
     wer = word_error_rate(hypotheses=hypotheses, references=references, use_cer=use_cer)
 
     if tag is None:
-        nemo.logging.info(f"==========>>>>>>Evaluation Loss: {eloss}")
-        nemo.logging.info(f"==========>>>>>>Evaluation {eval_metric}: " f"{wer * 100 : 5.2f}%")
+        logging.info(f"==========>>>>>>Evaluation Loss: {eloss}")
+        logging.info(f"==========>>>>>>Evaluation {eval_metric}: " f"{wer * 100 : 5.2f}%")
         return {"Evaluation_Loss": eloss, f"Evaluation_{eval_metric}": wer}
     else:
-        nemo.logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
-        nemo.logging.info(f"==========>>>>>>Evaluation {eval_metric} {tag}: " f"{wer * 100 : 5.2f}%")
+        logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
+        logging.info(f"==========>>>>>>Evaluation {eval_metric} {tag}: " f"{wer * 100 : 5.2f}%")
         return {
             f"Evaluation_Loss_{tag}": eloss,
             f"Evaluation_{eval_metric}_{tag}": wer,
@@ -163,3 +198,79 @@ def post_process_predictions(predictions, labels):
 
 def post_process_transcripts(transcript_list, transcript_len_list, labels):
     return __gather_transcripts(transcript_list, transcript_len_list, labels=labels)
+
+
+def process_classification_evaluation_batch(tensors: dict, global_vars: dict, top_k: list = 1):
+    """
+    Creates a dictionary holding the results from a batch of samples
+    """
+    if 'EvalLoss' not in global_vars.keys():
+        global_vars['EvalLoss'] = []
+    if 'batchsize' not in global_vars.keys():
+        global_vars['batchsize'] = []
+
+    if isinstance(top_k, int):
+        top_k = [top_k]
+
+    top_k = sorted(top_k)
+
+    for k in top_k:
+        if f'CorrectCount@{k}' not in global_vars.keys():
+            global_vars[f'CorrectCount@{k}'] = []
+
+    logits = None
+    labels = None
+
+    for kv, v in tensors.items():
+        if kv.startswith('loss'):
+            global_vars['EvalLoss'] += __gather_losses(v)
+        elif kv.startswith('logits'):
+            logits = torch.cat(v, 0)  # if len(v) > 1 else v
+        elif kv.startswith('label'):
+            labels = torch.cat(v, 0)  # if len(v) > 1 else v
+
+    batch_size = labels.size(0)
+    global_vars['batchsize'] += [batch_size]
+
+    with torch.no_grad():
+        topk_acc = classification_accuracy(logits, labels, top_k=top_k)
+
+    for k, acc in zip(top_k, topk_acc):
+        # Accuracy is provided as a percentage, we require the count of correct samples
+        # Therefore multiply by batch size to get count of correctly predicted samples
+        global_vars[f'CorrectCount@{k}'] += [acc * batch_size]
+
+
+def process_classification_evaluation_epoch(global_vars: dict, eval_metric=None, tag=None):
+    """
+    Calculates the aggregated loss and WER across the entire evaluation dataset
+    """
+    if eval_metric is None:
+        eval_metric = [1]
+
+    if type(eval_metric) not in (list, tuple):
+        eval_metric = [eval_metric]
+
+    top_k = eval_metric
+
+    eloss = torch.mean(torch.stack(global_vars['EvalLoss'])).item()
+    batch_sizes = global_vars['batchsize']
+    total_num_samples = torch.tensor(batch_sizes).sum().double()
+
+    topk_accs = []
+    for k in top_k:
+        correct_counts = torch.tensor(global_vars[f'CorrectCount@{k}'])
+        topk_acc = correct_counts.sum().double() / total_num_samples
+        topk_accs.append(topk_acc)
+
+    if tag is None:
+        tag = ''
+
+    logs = {f"Evaluation_Loss {tag}": eloss}
+
+    logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss:.3f}")
+    for k, acc in zip(top_k, topk_accs):
+        logging.info(f"==========>>>>>>Evaluation Accuracy Top@{k} {tag}: {acc * 100.:3.4f}")
+        logs[f'Evaluation_Accuracy_Top@{k} {tag}'] = acc * 100.0
+
+    return logs
diff --git a/nemo/collections/asr/jasper.py b/nemo/collections/asr/jasper.py
index d6fcf7e38259..a259c5cbd533 100644
--- a/nemo/collections/asr/jasper.py
+++ b/nemo/collections/asr/jasper.py
@@ -5,9 +5,11 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .parts.jasper import JasperBlock, init_weights, jasper_activations
+from .parts.jasper import JasperBlock, StatsPoolLayer, init_weights, jasper_activations
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core.neural_types import *
+from nemo.utils import logging
+from nemo.utils.decorators import add_port_docs
 
 
 class JasperEncoder(TrainableNM):
@@ -44,16 +46,46 @@ class JasperEncoder(TrainableNM):
                     'tied' (bool)  # Whether to use the same weights for all
                         # sub-blocks.
                         # Defaults to False
+                    'se' (bool)  # Whether to add Squeeze and Excitation
+                        # sub-blocks.
+                        # Defaults to False
+                    'se_reduction_ratio' (int)  # The reduction ratio of the Squeeze
+                        # sub-module.
+                        # Must be an integer > 1.
+                        # Defaults to 8.
+                    'se_context_window' (int) # The size of the temporal context
+                        # provided to SE sub-module.
+                        # Must be an integer. If value <= 0, will perform global
+                        # temporal pooling (global context).
+                        # If value >= 1, will perform stride 1 average pooling to
+                        # compute context window.
+                    'se_interpolation_mode' (str) # Interpolation mode of timestep dimension.
+                        # Used only if context window is > 1.
+                        # The modes available for resizing are: `nearest`, `linear` (3D-only),
+                        # `bilinear`, `area`
+                    'kernel_size_factor' (float)  # Conv kernel size multiplier
+                        # Can be either an int or float
+                        # Kernel size is recomputed as below:
+                        # new_kernel_size = int(max(1, (kernel_size * kernel_width)))
+                        # to prevent kernel sizes than 1.
+                        # Note: If rescaled kernel size is an even integer,
+                        # adds 1 to the rescaled kernel size to allow "same"
+                        # padding.
+                    'stride_last' (bool) # Bool flag to determine whether each
+                        # of the the repeated sub-blockss will perform a stride,
+                        # or only the last sub-block will perform a strided convolution.
                 }
 
         activation (str): Activation function used for each sub-blocks. Can be
-            one of ["hardtanh", "relu", "selu"].
+            one of ["hardtanh", "relu", "selu", "swish"].
         feat_in (int): Number of channels being input to this module
         normalization_mode (str): Normalization to be used in each sub-block.
             Can be one of ["batch", "layer", "instance", "group"]
             Defaults to "batch".
         residual_mode (str): Type of residual connection.
-            Can be "add" or "max".
+            Can be "add", "stride_add" or "max".
+            "stride_add" mode performs strided convolution prior to residual
+            addition.
             Defaults to "add".
         norm_groups (int): Number of groups for "group" normalization type.
             If set to -1, number of channels is used.
@@ -71,6 +103,7 @@ class JasperEncoder(TrainableNM):
     length: Optional[torch.Tensor]
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -84,6 +117,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -96,6 +130,25 @@ def output_ports(self):
             "encoded_lengths": NeuralType(tuple('B'), LengthsType()),
         }
 
+    @property
+    def _disabled_deployment_input_ports(self):
+        return set(["length"])
+
+    @property
+    def _disabled_deployment_output_ports(self):
+        return set(["encoded_lengths"])
+
+    def _prepare_for_deployment(self):
+        m_count = 0
+        for m in self.modules():
+            if type(m).__name__ == "MaskedConv1d":
+                m.use_mask = False
+                m_count += 1
+        logging.warning(f"Turned off {m_count} masked convolutions")
+
+        input_example = torch.randn(16, self.__feat_in, 256)
+        return input_example, None
+
     def __init__(
         self,
         jasper,
@@ -113,6 +166,8 @@ def __init__(
         activation = jasper_activations[activation]()
         feat_in = feat_in * frame_splicing
 
+        self.__feat_in = feat_in
+
         residual_panes = []
         encoder_layers = []
         self.dense_residual = False
@@ -125,6 +180,13 @@ def __init__(
             groups = lcfg.get('groups', 1)
             separable = lcfg.get('separable', False)
             heads = lcfg.get('heads', -1)
+            residual_mode = lcfg.get('residual_mode', residual_mode)
+            se = lcfg.get('se', False)
+            se_reduction_ratio = lcfg.get('se_reduction_ratio', 8)
+            se_context_window = lcfg.get('se_context_window', -1)
+            se_interpolation_mode = lcfg.get('se_interpolation_mode', 'nearest')
+            kernel_size_factor = lcfg.get('kernel_size_factor', 1.0)
+            stride_last = lcfg.get('stride_last', False)
             encoder_layers.append(
                 JasperBlock(
                     feat_in,
@@ -144,6 +206,12 @@ def __init__(
                     activation=activation,
                     residual_panes=dense_res,
                     conv_mask=conv_mask,
+                    se=se,
+                    se_reduction_ratio=se_reduction_ratio,
+                    se_context_window=se_context_window,
+                    se_interpolation_mode=se_interpolation_mode,
+                    kernel_size_factor=kernel_size_factor,
+                    stride_last=stride_last,
                 )
             )
             feat_in = lcfg['filters']
@@ -177,6 +245,7 @@ class JasperDecoderForCTC(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -188,15 +257,22 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         # return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})}
         return {"output": NeuralType(('B', 'T', 'D'), LogprobsType())}
 
-    def __init__(self, feat_in, num_classes, init_mode="xavier_uniform"):
-        super().__init__()
+    def __init__(self, feat_in, num_classes, init_mode="xavier_uniform", vocabulary=None):
+        if vocabulary is not None:
+            if num_classes != len(vocabulary):
+                raise ValueError(
+                    f"If vocabulary is specified, it's length should be equal to the num_classes. But I got: num_classes={num_classes} and len(vocabluary)={len(vocabulary)}"
+                )
+            self.__vocabulary = vocabulary
 
+        super().__init__()
         self._feat_in = feat_in
         # Add 1 for blank char
         self._num_classes = num_classes + 1
@@ -207,3 +283,261 @@ def __init__(self, feat_in, num_classes, init_mode="xavier_uniform"):
 
     def forward(self, encoder_output):
         return F.log_softmax(self.decoder_layers(encoder_output).transpose(1, 2), dim=-1)
+
+    def _prepare_for_deployment(self):
+        input_example = torch.randn(34, self._feat_in, 1)
+        return input_example, None
+
+    @property
+    def vocabulary(self):
+        return self.__vocabulary
+
+
+class JasperDecoderForClassification(TrainableNM):
+    """
+        Jasper Decoder creates the final layer in Jasper that maps from the outputs
+        of Jasper Encoder to one class label.
+
+        Args:
+            feat_in (int): Number of channels being input to this module
+            num_classes (int): Number of characters in ASR model's vocab/labels.
+                This count should not include the CTC blank symbol.
+            init_mode (str): Describes how neural network parameters are
+                initialized. Options are ['xavier_uniform', 'xavier_normal',
+                'kaiming_uniform','kaiming_normal'].
+                Defaults to "xavier_uniform".
+        """
+
+    @property
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            # "encoder_output": NeuralType(
+            #     {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag)}
+            # )
+            "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation())
+        }
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})}
+        return {"logits": NeuralType(('B', 'D'), LogitsType())}
+
+    def __init__(
+        self, *, feat_in, num_classes, init_mode="xavier_uniform", return_logits=True, pooling_type='avg', **kwargs
+    ):
+        TrainableNM.__init__(self, **kwargs)
+
+        self._feat_in = feat_in
+        self._return_logits = return_logits
+        self._num_classes = num_classes
+
+        if pooling_type == 'avg':
+            self.pooling = nn.AdaptiveAvgPool1d(1)
+        elif pooling_type == 'max':
+            self.pooling = nn.AdaptiveMaxPool1d(1)
+        else:
+            raise ValueError('Pooling type chosen is not valid. Must be either `avg` or `max`')
+
+        self.decoder_layers = nn.Sequential(nn.Linear(self._feat_in, self._num_classes, bias=True))
+        self.apply(lambda x: init_weights(x, mode=init_mode))
+        self.to(self._device)
+
+    def forward(self, encoder_output):
+        batch, in_channels, timesteps = encoder_output.size()
+
+        encoder_output = self.pooling(encoder_output).view(batch, in_channels)  # [B, C]
+        logits = self.decoder_layers(encoder_output)  # [B, num_classes]
+
+        if self._return_logits:
+            return logits
+
+        return F.softmax(logits, dim=-1)
+
+
+class JasperDecoderForSpkrClass(TrainableNM):
+    """
+    Jasper Decoder creates the final layer in Jasper that maps from the outputs
+    of Jasper Encoder to the embedding layer followed by speaker based softmax loss.
+
+    Args:
+        feat_in (int): Number of channels being input to this module
+        num_classes (int): Number of unique speakers in dataset
+        emb_sizes (list) : shapes of intermediate embedding layers (we consider speaker embbeddings from 1st of this layers)
+                Defaults to [1024,1024]
+        pool_mode (str) : Pooling stratergy type. options are 'gram','xvector','superVector'.
+                Defaults to 'xvector'
+        init_mode (str): Describes how neural network parameters are
+            initialized. Options are ['xavier_uniform', 'xavier_normal',
+            'kaiming_uniform','kaiming_normal'].
+            Defaults to "xavier_uniform".
+    """
+
+    @property
+    def input_ports(self):
+        """Returns definitions of module input ports.
+
+        encoder_output:
+            0: AxisType(BatchTag)
+
+            1: AxisType(EncodedRepresentationTag)
+
+            2: AxisType(ProcessedTimeTag)
+        """
+
+        return {"encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation())}
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+
+        logits:
+            0: AxisType(BatchTag)
+
+            1: AxisType(ChannelTag)
+                
+        embs: 
+            0: AxisType(BatchTag)
+            1: AxisType(EncodedRepresentationTah) 
+        """
+        return {
+            "logits": NeuralType(('B', 'D'), LogitsType()),
+            "embs": NeuralType(('B', 'D'), AcousticEncodedRepresentation()),
+        }
+
+    def __init__(self, feat_in, num_classes, emb_sizes=[1024, 1024], pool_mode='xvector', init_mode="xavier_uniform"):
+        TrainableNM.__init__(self)
+        self._feat_in = 0
+        if pool_mode == 'gram':
+            gram = True
+            super_vector = False
+        elif pool_mode == 'superVector':
+            gram = True
+            super_vector = True
+        else:
+            gram = False
+            super_vector = False
+
+        if gram:
+            self._feat_in += feat_in ** 2
+        else:
+            self._feat_in += 2 * feat_in
+
+        if super_vector and gram:
+            self._feat_in += 2 * feat_in
+
+        self._midEmbd1 = int(emb_sizes[0])  # Spkr Vector Embedding Shape
+        self._midEmbd2 = int(emb_sizes[1]) if len(emb_sizes) > 1 else 0  # Spkr Vector Embedding Shape
+
+        self._num_classes = num_classes
+        self._pooling = StatsPoolLayer(gram=gram, super_vector=super_vector)
+
+        self.mid1 = self.affineLayer(self._feat_in, self._midEmbd1, learn_mean=False)
+        self.mid2 = self.affineLayer(self._midEmbd1, self._midEmbd2, learn_mean=False)
+        self.final = nn.Linear(self._midEmbd2, self._num_classes)
+
+        self.apply(lambda x: init_weights(x, mode=init_mode))
+        self.to(self._device)
+
+    def affineLayer(self, inp_shape, out_shape, learn_mean=True):
+        layer = nn.Sequential(
+            nn.Linear(inp_shape, out_shape),
+            nn.BatchNorm1d(out_shape, affine=learn_mean, track_running_stats=True),
+            nn.ReLU(),
+        )
+
+        return layer  # layer, embs
+
+    def forward(self, encoder_output):
+        # encoder_output = self.norm(encoder_output)
+        pool = self._pooling(encoder_output)
+        mid1, emb1 = self.mid1(pool), self.mid1[:2](pool)
+        mid2, embs = self.mid2(mid1), self.mid2[:2](mid1)
+        out = self.final(mid2)
+
+        return out, emb1
+
+
+# Siamese Network, support to be added in future releases
+# class SiameseDecoderForSpeakerClass(TrainableNM):
+#     """
+#     Jasper Decoder creates the final layer in Jasper that maps from the outputs
+#     of Jasper Encoder to the vocabulary of interest.
+
+#     Args:
+#         feat_in (int): Number of channels being input to this module
+#         num_classes (int): Number of characters in ASR model's vocab/labels.
+#             This count should not include the CTC blank symbol.
+#         init_mode (str): Describes how neural network parameters are
+#             initialized. Options are ['xavier_uniform', 'xavier_normal',
+#             'kaiming_uniform','kaiming_normal'].
+#             Defaults to "xavier_uniform".
+#     """
+
+#     @property
+#     def input_ports(self):
+#         """Returns definitions of module input ports.
+
+#         encoder_output:
+#             0: AxisType(BatchTag)
+
+#             1: AxisType(EncodedRepresentationTag)
+
+#             2: AxisType(ProcessedTimeTag)
+#         """
+#         return {
+#             "embs1": NeuralType(('B', 'D'), AcousticEncodedRepresentation()),
+#             "embs2": NeuralType(('B', 'D'), AcousticEncodedRepresentation()),
+#         }
+
+#     @property
+#     def output_ports(self):
+#         """Returns definitions of module output ports.
+
+#         output:
+#             0: AxisType(BatchTag)
+
+#             1: AxisType(ChannelTag)
+#         """
+#         return {
+#             "logits": NeuralType(('B', 'D'), LogitsType()),
+#         }
+
+#     def __init__(self, emb_size, mid_dim, init_mode="xavier_uniform"):
+#         super().__init__()
+#         self._feat_in = emb_size
+#         self._mid_dim = mid_dim
+
+#         self.connect = self.affineLayer(self._feat_in, self._mid_dim, learn_mean=True)
+
+#         self.S = nn.Parameter(torch.randn(self._mid_dim, self._mid_dim), requires_grad=True)
+#         self.b = nn.Parameter(torch.randn(1), requires_grad=True)
+
+#         self.apply(lambda x: init_weights(x, mode=init_mode))
+#         self.to(self._device)
+
+#     def affineLayer(self, inp_shape, out_shape, learn_mean=True):
+#         layer = nn.Sequential(
+#             nn.Linear(inp_shape, out_shape),
+#             nn.BatchNorm1d(out_shape, affine=learn_mean, track_running_stats=True),
+#             nn.ReLU(),
+#         )
+
+#         return layer  # layer, embs
+
+#     def forward(self, inp_emb1, inp_emb2):
+
+#         x = self.connect(inp_emb1)
+#         y = self.connect(inp_emb2)
+
+#         out = (
+#             torch.matmul(x, y.T).diag()
+#             - torch.matmul(torch.matmul(x, self.S), x.T).diag()
+#             - torch.matmul(torch.matmul(y, self.S), y.T).diag()
+#             + self.b
+#         )
+
+#         return out
diff --git a/nemo/collections/asr/las/helpers.py b/nemo/collections/asr/las/helpers.py
index fde132321b50..baa44e48075b 100644
--- a/nemo/collections/asr/las/helpers.py
+++ b/nemo/collections/asr/las/helpers.py
@@ -3,9 +3,9 @@
 
 import torch
 
-import nemo
 from nemo.backends.pytorch.common.metrics import char_lm_metrics
 from nemo.collections.asr.metrics import word_error_rate
+from nemo.utils import logging
 
 ENG_MWN = 5.3
 
@@ -68,14 +68,14 @@ def process_evaluation_epoch(
         transcript_texts = list(chain(*global_vars['transcript_texts']))
         prediction_texts = list(chain(*global_vars['prediction_texts']))
 
-        nemo.logging.info(f'Ten examples (transcripts and predictions)')
-        nemo.logging.info(transcript_texts[:10])
-        nemo.logging.info(prediction_texts[:10])
+        logging.info(f'Ten examples (transcripts and predictions)')
+        logging.info(transcript_texts[:10])
+        logging.info(prediction_texts[:10])
 
         wer = word_error_rate(hypotheses=prediction_texts, references=transcript_texts)
         return_dict[f'metric/{mode}_wer_{tag}'] = wer
 
-    nemo.logging.info(pformat(return_dict))
+    logging.info(pformat(return_dict))
 
     return return_dict
 
diff --git a/nemo/collections/asr/las/misc.py b/nemo/collections/asr/las/misc.py
index 56519e143fd8..b977b81218c3 100644
--- a/nemo/collections/asr/las/misc.py
+++ b/nemo/collections/asr/las/misc.py
@@ -5,6 +5,7 @@
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.collections.asr.jasper import init_weights as jasper_init_weights
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 
 
 class JasperRNNConnector(TrainableNM):
@@ -18,6 +19,7 @@ class JasperRNNConnector(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -25,6 +27,7 @@ def input_ports(self):
         return {'tensor': NeuralType(('B', 'D', 'T'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
diff --git a/nemo/collections/asr/losses.py b/nemo/collections/asr/losses.py
index 909a16d6f39c..f3ca8f5a4d25 100644
--- a/nemo/collections/asr/losses.py
+++ b/nemo/collections/asr/losses.py
@@ -4,18 +4,22 @@
 
 from nemo.backends.pytorch.nm import LossNM
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 
 
 class CTCLossNM(LossNM):
     """
     Neural Module wrapper for pytorch's ctcloss
-
     Args:
         num_classes (int): Number of characters in ASR model's vocab/labels.
             This count should not include the CTC blank symbol.
+        zero_infinity (bool): Whether to zero infinite losses and the associated gradients.
+            By default, it is False. Infinite losses mainly occur when the inputs are too
+            short to be aligned to the targets.
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -31,20 +35,20 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
-
         loss:
             NeuralType(None)
         """
         # return {"loss": NeuralType(None)}
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self, num_classes):
+    def __init__(self, num_classes, zero_infinity=False):
         super().__init__()
 
         self._blank = num_classes
-        self._criterion = nn.CTCLoss(blank=self._blank, reduction='none')
+        self._criterion = nn.CTCLoss(blank=self._blank, reduction='none', zero_infinity=zero_infinity)
 
     def _loss(self, log_probs, targets, input_length, target_length):
         input_length = input_length.long()
diff --git a/nemo/collections/asr/metrics.py b/nemo/collections/asr/metrics.py
index e44263f11198..b0795c189a47 100644
--- a/nemo/collections/asr/metrics.py
+++ b/nemo/collections/asr/metrics.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2019 NVIDIA Corporation
-from typing import List
+from typing import List, Optional
+
+import torch
 
 
 def __levenshtein(a: List, b: List) -> int:
@@ -30,7 +32,6 @@ def word_error_rate(hypotheses: List[str], references: List[str], use_cer=False)
     Computes Average Word Error rate between two texts represented as
     corresponding lists of string. Hypotheses and references must have same
     length.
-
     Args:
       hypotheses: list of hypotheses
       references: list of references
@@ -60,3 +61,39 @@ def word_error_rate(hypotheses: List[str], references: List[str], use_cer=False)
     else:
         wer = float('inf')
     return wer
+
+
+def classification_accuracy(
+    logits: torch.Tensor, targets: torch.Tensor, top_k: Optional[List[int]] = None
+) -> List[float]:
+    """
+    Computes the top-k classification accuracy provided with
+    un-normalized logits of a model and ground truth targets.
+    If top_k is not provided, defaults to top_1 accuracy.
+    If top_k is provided as a list, then the values are sorted
+    in ascending order.
+    Args:
+        logits: Un-normalized logits of a model. Softmax will be
+            applied to these logits prior to computation of accuracy.
+        targets: Vector of integers which represent indices of class
+            labels.
+        top_k: Optional list of integers in the range [1, max_classes].
+    Returns:
+        A list of length `top_k`, where each value represents top_i
+        accuracy (i in `top_k`).
+    """
+    if top_k is None:
+        top_k = [1]
+    max_k = max(top_k)
+
+    with torch.no_grad():
+        _, predictions = logits.topk(max_k, dim=1, largest=True, sorted=True)
+        predictions = predictions.t()
+        correct = predictions.eq(targets.view(1, -1)).expand_as(predictions)
+
+        results = []
+        for k in top_k:
+            correct_k = correct[:k].view(-1).float().mean().to('cpu').numpy()
+            results.append(correct_k)
+
+    return results
diff --git a/nemo/collections/asr/models/__init__.py b/nemo/collections/asr/models/__init__.py
new file mode 100644
index 000000000000..6e0ac9d8b206
--- /dev/null
+++ b/nemo/collections/asr/models/__init__.py
@@ -0,0 +1,3 @@
+from .asrconvctcmodel import ASRConvCTCModel, JasperNet, QuartzNet
+
+__all__ = ['ASRConvCTCModel', 'QuartzNet', 'JasperNet']
diff --git a/nemo/collections/asr/models/asrconvctcmodel.py b/nemo/collections/asr/models/asrconvctcmodel.py
new file mode 100644
index 000000000000..8c4d4e809831
--- /dev/null
+++ b/nemo/collections/asr/models/asrconvctcmodel.py
@@ -0,0 +1,297 @@
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Iterable, List, Optional
+
+import nemo
+from nemo import logging
+from nemo.core import NeMoModel, NeuralGraph, NeuralModule, NeuralType, OperationMode, PretrainedModelInfo
+from nemo.utils import maybe_download_from_cloud
+from nemo.utils.decorators import add_port_docs
+
+
+class ASRConvCTCModel(NeMoModel):
+    """
+    Generic convolutional CTC-based model with encoder and decoder. It also contains pre-processing module and
+    data augmentation model.
+
+    Example models of this type are: JasperNet and QuartzNet
+    """
+
+    def __init__(
+        self,
+        preprocessor_params: Dict,
+        encoder_params: Dict,
+        decoder_params: Dict,
+        spec_augment_params: Optional[Dict] = None,
+    ):
+        super().__init__()
+        # Instantiate necessary modules
+        self.__vocabulary = None
+        preprocessor, spec_augmentation, encoder, decoder = self.__instantiate_modules(
+            preprocessor_params, encoder_params, decoder_params, spec_augment_params
+        )
+        self._operation_mode = OperationMode.training
+
+        # self.__training_neural_graph = NeuralGraph(operation_mode=OperationMode.training)
+        self.__training_neural_graph = NeuralGraph(operation_mode=OperationMode.both)
+        with self.__training_neural_graph:
+            # Copy one input port definitions - using "user" port names.
+            self.__training_neural_graph.inputs["input_signal"] = preprocessor.input_ports["input_signal"]
+            self.__training_neural_graph.inputs["length"] = preprocessor.input_ports["length"]
+            # Bind the selected inputs. Connect the modules
+            i_processed_signal, i_processed_signal_len = preprocessor(
+                input_signal=self.__training_neural_graph.inputs["input_signal"],
+                length=self.__training_neural_graph.inputs["length"],
+            )
+            if spec_augmentation is not None:
+                i_processed_signal = spec_augmentation(input_spec=i_processed_signal)
+            i_encoded, i_encoded_len = encoder(audio_signal=i_processed_signal, length=i_processed_signal_len)
+            i_log_probs = decoder(encoder_output=i_encoded)
+            # Bind the selected outputs.
+            self.__training_neural_graph.outputs["log_probs"] = i_log_probs
+            self.__training_neural_graph.outputs["encoded_len"] = i_encoded_len
+
+        # self.__evaluation_neural_graph = NeuralGraph(operation_mode=OperationMode.evaluation)
+        self.__evaluation_neural_graph = NeuralGraph(operation_mode=OperationMode.both)
+        with self.__evaluation_neural_graph:
+            # Copy one input port definitions - using "user" port names.
+            self.__evaluation_neural_graph.inputs["input_signal"] = preprocessor.input_ports["input_signal"]
+            self.__evaluation_neural_graph.inputs["length"] = preprocessor.input_ports["length"]
+            # Bind the selected inputs. Connect the modules
+            i_processed_signal, i_processed_signal_len = preprocessor(
+                input_signal=self.__evaluation_neural_graph.inputs["input_signal"],
+                length=self.__evaluation_neural_graph.inputs["length"],
+            )
+            # Notice lack of speck augmentation for inference
+            i_encoded, i_encoded_len = encoder(audio_signal=i_processed_signal, length=i_processed_signal_len)
+            i_log_probs = decoder(encoder_output=i_encoded)
+            # Bind the selected outputs.
+            self.__evaluation_neural_graph.outputs["log_probs"] = i_log_probs
+            self.__evaluation_neural_graph.outputs["encoded_len"] = i_encoded_len
+
+    def __instantiate_modules(
+        self, preprocessor_params, encoder_params, decoder_params, spec_augment_params=None,
+    ):
+        preprocessor = NeuralModule.deserialize(preprocessor_params)
+        encoder = NeuralModule.deserialize(encoder_params)
+        decoder = NeuralModule.deserialize(decoder_params)
+        if hasattr(decoder, 'vocabulary'):
+            self.__vocabulary = decoder.vocabulary
+        else:
+            self.__vocabulary = None
+
+        if spec_augment_params is not None:
+            spec_augmentation = NeuralModule.deserialize(spec_augment_params)
+        else:
+            spec_augmentation = None
+
+        # Record all modules
+        self._modules = []
+        self._preprocessor = preprocessor
+        self._spec_augmentation = spec_augmentation
+        self._encoder = encoder
+        self._decoder = decoder
+        if spec_augmentation is not None:
+            self._modules += [preprocessor, spec_augmentation, encoder, decoder]
+        else:
+            self._modules += [preprocessor, encoder, decoder]
+
+        # Create input and output ports
+        self._input_ports = preprocessor.input_ports
+        self._output_ports = decoder.output_ports
+        self._output_ports['encoded_lengths'] = encoder.output_ports['encoded_lengths']
+        return self._preprocessor, self._spec_augmentation, self._encoder, self._decoder
+
+    @property
+    def train_graph(self) -> NeuralGraph:
+        return self.__training_neural_graph
+
+    @property
+    def eval_graph(self) -> NeuralGraph:
+        return self.__evaluation_neural_graph
+
+    @property
+    @add_port_docs()
+    def input_ports(self) -> Optional[Dict[str, NeuralType]]:
+        return self._input_ports
+
+    @property
+    @add_port_docs()
+    def output_ports(self) -> Optional[Dict[str, NeuralType]]:
+        return self._output_ports
+
+    @property
+    def vocabulary(self):
+        return self.__vocabulary
+
+    @property
+    def num_weights(self):
+        return self._encoder.num_weights + self._decoder.num_weights
+
+    @staticmethod
+    def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
+        """List all available pre-trained models (e.g. weights) for convolutional
+        encoder-decoder CTC-based speech recognition models.
+
+        Returns:
+            A list of PretrainedModelInfo tuples.
+            The pretrained_model_name field of the tuple can be used to
+            retrieve pre-trained model's weights (pass it as
+            pretrained_model_name argument to the module's constructor)
+        """
+        logging.warning("TODO: CHANGE ME TO GRAB STUFF FROM NGC")
+        result = []
+        model = PretrainedModelInfo(
+            pretrained_model_name="QuartzNet15x5-En",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/QuartzNet15x5-En-Base.nemo",
+            description="The model is trained on ~3300 hours of publicly available data and achieves a WER of 3.91% on LibriSpeech dev-clean, and a WER of 10.58% on dev-other.",
+            parameters="",
+        )
+        result.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="QuartzNet15x5-Zh",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/QuartzNet15x5-Zh-Base.nemo",
+            description="The model is trained on ai-shell2 mandarin chinese dataset.",
+            parameters="",
+        )
+        result.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="JasperNet10x5-En",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/JasperNet10x5-En-Base.nemo",
+            description="The model achieves a WER of 3.46% on LibriSpeech dev-clean, 10.40% on dev-other, 3.69% on test-clean, and 10.49% on test-other.",
+            parameters="",
+        )
+        result.append(model)
+        return result
+
+    @classmethod
+    def from_pretrained(
+        cls, model_info, local_rank: int = 0, refresh_cache: bool = False, new_vocab: List[str] = None
+    ) -> Optional[NeuralModule]:
+        """Instantiates a particular kind of ASRConvCTCModel from pretrained checkpoint.
+        Can do so from file on disk or from the NVIDIA NGC.
+
+        Args:
+            model_info: Either path to ".nemo" file or a valid NGC Model name
+            local_rank: on which GPU to instantiate.
+            refresh_cache: If set to True, then when fetching from clould, this will re-fetch the file
+                from clould even if it is  already found in a cache locally.
+            new_vocab: If you would like to do fine-tuning with different vocabulary, pass it here. This will keep all
+                weghts from the encoder (most of the network) but will randomly re-initialize the decoder with target vocab.
+
+        Returns:
+            NeMoModel instance
+        """
+        # Create destination folder:
+        instance = None
+        if model_info.endswith(".nemo"):
+            instance = super().from_pretrained(model_info=model_info, local_rank=local_rank)
+        else:
+            location_in_the_cloud = None
+            for pretrained_model_info in cls.list_pretrained_models():
+                if pretrained_model_info.pretrained_model_name == model_info:
+                    location_in_the_cloud = pretrained_model_info.location
+            if location_in_the_cloud is None:
+                raise FileNotFoundError(
+                    f"Could not find {model_info} in the cloud. Please call list_pretrained_models() to see all available pre-trained models."
+                )
+
+            filename = location_in_the_cloud.split("/")[-1]
+            url = location_in_the_cloud.replace(filename, "")
+            cache_subfolder = f"NEMO_{nemo.__version__}"
+
+            # if file exists on cache_folder/subfolder, it will be re-used, unless refresh_cache is True
+            nemo_model_file_in_cache = maybe_download_from_cloud(
+                url=url, filename=filename, subfolder=cache_subfolder, referesh_cache=refresh_cache
+            )
+            logging.info("Instantiating model from pre-trained checkpoint")
+            themodel = ASRConvCTCModel.from_pretrained(model_info=str(nemo_model_file_in_cache))
+            logging.info("Model instantiated with pre-trained weights")
+            instance = themodel
+        if new_vocab is None:
+            return instance
+        else:
+            logging.info(f"Changing model's vocabulary to: {new_vocab}")
+            instance._decoder = nemo.collections.asr.JasperDecoderForCTC(
+                feat_in=instance._decoder._feat_in, num_classes=len(new_vocab), vocabulary=new_vocab
+            )
+            return instance
+
+    @property
+    def modules(self) -> Iterable[NeuralModule]:
+        return self._modules
+
+
+class QuartzNet(ASRConvCTCModel):
+    """QuartzNet ASR Model. See: "QuartzNet: Deep Automatic Speech Recognition with 1D Time-Channel Separable Convolutions."
+    https://arxiv.org/abs/1910.10261"""
+
+    @staticmethod
+    def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
+        """List all available pre-trained models (e.g. weights) for convolutional
+        encoder-decoder CTC-based speech recognition models.
+
+        Returns:
+            A list of PretrainedModelInfo tuples.
+            The pretrained_model_name field of the tuple can be used to
+            retrieve pre-trained model's weights (pass it as
+            pretrained_model_name argument to the module's constructor)
+        """
+        logging.warning("TODO: CHANGE ME TO GRAB STUFF FROM NGC")
+        result = []
+        model = PretrainedModelInfo(
+            pretrained_model_name="QuartzNet15x5-En",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/QuartzNet15x5-En-Base.nemo",
+            description="The model is trained on ~3300 hours of publicly available data and achieves a WER of 3.91% on LibriSpeech dev-clean, and a WER of 10.58% on dev-other.",
+            parameters="",
+        )
+        result.append(model)
+
+        model = PretrainedModelInfo(
+            pretrained_model_name="QuartzNet15x5-Zh",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/QuartzNet15x5-Zh-Base.nemo",
+            description="The model is trained on ai-shell2 mandarin chinese dataset.",
+            parameters="",
+        )
+        result.append(model)
+        return result
+
+
+class JasperNet(ASRConvCTCModel):
+    """QuartzNet ASR Model. See: "Jasper: An End-to-End Convolutional Neural Acoustic Model."
+    https://arxiv.org/abs/1904.03288"""
+
+    @staticmethod
+    def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
+        """List all available pre-trained models (e.g. weights) for convolutional
+        encoder-decoder CTC-based speech recognition models.
+
+        Returns:
+            A list of PretrainedModelInfo tuples.
+            The pretrained_model_name field of the tuple can be used to
+            retrieve pre-trained model's weights (pass it as
+            pretrained_model_name argument to the module's constructor)
+        """
+        logging.warning("TODO: CHANGE ME TO GRAB STUFF FROM NGC")
+        result = []
+        model = PretrainedModelInfo(
+            pretrained_model_name="JasperNet10x5-En",
+            location="https://nemo-public.s3.us-east-2.amazonaws.com/nemo_0.11_models_test/JasperNet10x5-En-Base.nemo",
+            description="The model achieves a WER of 3.46% on LibriSpeech dev-clean, 10.40% on dev-other, 3.69% on test-clean, and 10.49% on test-other.",
+            parameters="",
+        )
+        result.append(model)
+        return result
diff --git a/nemo/collections/asr/parts/__init__.py b/nemo/collections/asr/parts/__init__.py
index 93a31900dfa8..249a0587573c 100644
--- a/nemo/collections/asr/parts/__init__.py
+++ b/nemo/collections/asr/parts/__init__.py
@@ -1,4 +1,4 @@
-from .dataset import AudioDataset
+from .dataset import AudioDataset, AudioLabelDataset
 from .features import WaveformFeaturizer
 
-__all__ = ['AudioDataset', 'WaveformFeaturizer']
+__all__ = ['AudioDataset', 'AudioLabelDataset', 'WaveformFeaturizer']
diff --git a/nemo/collections/asr/parts/cleaners.py b/nemo/collections/asr/parts/cleaners.py
index 053fa078a0e8..9310bae3ed07 100755
--- a/nemo/collections/asr/parts/cleaners.py
+++ b/nemo/collections/asr/parts/cleaners.py
@@ -92,7 +92,7 @@ def clean_text(string, table, punctuation_to_replace):
 
 def warn_common_chars(string):
     if re.search(r'[£€]', string):
-        logging.warning("Your transcript contains one of '£' or '€' which we do" "not currently handle")
+        logging.warning("Your transcript contains one of '£' or '€' which we do not currently handle")
 
 
 def clean_numbers(string):
diff --git a/nemo/collections/asr/parts/collections.py b/nemo/collections/asr/parts/collections.py
index 203f3409d236..de11bfebcf7c 100644
--- a/nemo/collections/asr/parts/collections.py
+++ b/nemo/collections/asr/parts/collections.py
@@ -1,12 +1,13 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import collections
+import json
 import os
-from typing import List, Optional, Union
+from typing import Any, Dict, List, Optional, Union
 
 import pandas as pd
 
-import nemo
 from nemo.collections.asr.parts import manifest, parsers
+from nemo.utils import logging
 
 
 class _Collection(collections.UserList):
@@ -33,7 +34,7 @@ def __init__(self, texts: List[str], parser: parsers.CharParser):
             tokens = parser(text)
 
             if tokens is None:
-                nemo.logging.warning("Fail to parse '%s' text line.", text)
+                logging.warning("Fail to parse '%s' text line.", text)
                 continue
 
             data.append(output_type(tokens))
@@ -76,18 +77,22 @@ def __parse_texts(file: str) -> List[str]:
 class AudioText(_Collection):
     """List of audio-transcript text correspondence with preprocessing."""
 
-    OUTPUT_TYPE = collections.namedtuple(typename='AudioTextEntity', field_names='audio_file duration text_tokens',)
+    OUTPUT_TYPE = collections.namedtuple(
+        typename='AudioTextEntity', field_names='audio_file duration text_tokens offset',
+    )
 
     def __init__(
         self,
         audio_files: List[str],
         durations: List[float],
         texts: List[str],
+        offsets: List[str],
         parser: parsers.CharParser,
         min_duration: Optional[float] = None,
         max_duration: Optional[float] = None,
         max_number: Optional[int] = None,
         do_sort_by_duration: bool = False,
+        index_by_file_id: bool = False,
     ):
         """Instantiates audio-text manifest with filters and preprocessing.
 
@@ -95,42 +100,57 @@ def __init__(
             audio_files: List of audio files.
             durations: List of float durations.
             texts: List of raw text transcripts.
+            offsets: List of duration offsets or None.
             parser: Instance of `CharParser` to convert string to tokens.
             min_duration: Minimum duration to keep entry with (default: None).
             max_duration: Maximum duration to keep entry with (default: None).
             max_number: Maximum number of samples to collect.
-            do_sort_by_duration: True if sort samples list by duration.
+            do_sort_by_duration: True if sort samples list by duration. Not compatible with index_by_file_id.
+            index_by_file_id: If True, saves a mapping from filename base (ID) to index in data.
         """
 
         output_type = self.OUTPUT_TYPE
-        data, duration_filtered = [], 0.0
-        for audio_file, duration, text in zip(audio_files, durations, texts):
+        data, duration_filtered, num_filtered, total_duration = [], 0.0, 0, 0.0
+        if index_by_file_id:
+            self.mapping = {}
+
+        for audio_file, duration, text, offset in zip(audio_files, durations, texts, offsets):
             # Duration filters.
             if min_duration is not None and duration < min_duration:
                 duration_filtered += duration
+                num_filtered += 1
                 continue
 
             if max_duration is not None and duration > max_duration:
                 duration_filtered += duration
+                num_filtered += 1
                 continue
 
             text_tokens = parser(text)
             if text_tokens is None:
                 duration_filtered += duration
+                num_filtered += 1
                 continue
 
-            data.append(output_type(audio_file, duration, text_tokens))
+            total_duration += duration
+
+            data.append(output_type(audio_file, duration, text_tokens, offset))
+            if index_by_file_id:
+                file_id, _ = os.path.splitext(os.path.basename(audio_file))
+                self.mapping[file_id] = len(data) - 1
 
             # Max number of entities filter.
             if len(data) == max_number:
                 break
 
         if do_sort_by_duration:
-            data.sort(key=lambda entity: entity.duration)
+            if index_by_file_id:
+                logging.warning("Tried to sort dataset by duration, but cannot since index_by_file_id is set.")
+            else:
+                data.sort(key=lambda entity: entity.duration)
 
-        nemo.logging.info(
-            "Filtered duration for loading collection is %f.", duration_filtered,
-        )
+        logging.info("Dataset loaded with %d files totalling %.2f hours", len(data), total_duration / 3600)
+        logging.info("%d files were filtered totalling %.2f hours", num_filtered, duration_filtered / 3600)
 
         super().__init__(data)
 
@@ -148,10 +168,130 @@ def __init__(self, manifests_files: Union[str, List[str]], *args, **kwargs):
             **kwargs: Kwargs to pass to `AudioText` constructor.
         """
 
-        audio_files, durations, texts = [], [], []
+        audio_files, durations, texts, offsets = [], [], [], []
         for item in manifest.item_iter(manifests_files):
             audio_files.append(item['audio_file'])
             durations.append(item['duration'])
             texts.append(item['text'])
+            offsets.append(item['offset'])
+
+        super().__init__(audio_files, durations, texts, offsets, *args, **kwargs)
+
+
+class SpeechLabel(_Collection):
+    """List of audio-label correspondence with preprocessing."""
+
+    OUTPUT_TYPE = collections.namedtuple(typename='SpeechLabelEntity', field_names='audio_file duration label offset',)
+
+    def __init__(
+        self,
+        audio_files: List[str],
+        durations: List[float],
+        labels: List[Union[int, str]],
+        offsets: List[Optional[float]],
+        min_duration: Optional[float] = None,
+        max_duration: Optional[float] = None,
+        max_number: Optional[int] = None,
+        do_sort_by_duration: bool = False,
+    ):
+        """Instantiates audio-label manifest with filters and preprocessing.
+
+        Args:
+            audio_files: List of audio files.
+            durations: List of float durations.
+            labels: List of labels.
+            offsets: List of offsets or None.
+            min_duration: Minimum duration to keep entry with (default: None).
+            max_duration: Maximum duration to keep entry with (default: None).
+            max_number: Maximum number of samples to collect.
+            do_sort_by_duration: True if sort samples list by duration.
+        """
+
+        output_type = self.OUTPUT_TYPE
+        data, duration_filtered = [], 0.0
+        for audio_file, duration, command, offset in zip(audio_files, durations, labels, offsets):
+            # Duration filters.
+            if min_duration is not None and duration < min_duration:
+                duration_filtered += duration
+                continue
+
+            if max_duration is not None and duration > max_duration:
+                duration_filtered += duration
+                continue
+
+            data.append(output_type(audio_file, duration, command, offset))
+
+            # Max number of entities filter.
+            if len(data) == max_number:
+                break
+
+        if do_sort_by_duration:
+            data.sort(key=lambda entity: entity.duration)
+
+        logging.info(
+            "Filtered duration for loading collection is %f.", duration_filtered,
+        )
+        self.uniq_labels = sorted(set(map(lambda x: x.label, data)))
+        logging.info("# {} files loaded accounting to # {} labels".format(len(data), len(self.uniq_labels)))
+
+        super().__init__(data)
+
+
+class ASRSpeechLabel(SpeechLabel):
+    """`SpeechLabel` collector from structured json files."""
+
+    def __init__(self, manifests_files: Union[str, List[str]], *args, **kwargs):
+        """Parse lists of audio files, durations and transcripts texts.
+
+        Args:
+            manifests_files: Either single string file or list of such -
+                manifests to yield items from.
+            *args: Args to pass to `SpeechLabel` constructor.
+            **kwargs: Kwargs to pass to `SpeechLabel` constructor.
+        """
+        audio_files, durations, labels, offsets = [], [], [], []
+
+        for item in manifest.item_iter(manifests_files, parse_func=self.__parse_item):
+            audio_files.append(item['audio_file'])
+            durations.append(item['duration'])
+            labels.append(item['label'])
+            offsets.append(item['offset'])
+
+        super().__init__(audio_files, durations, labels, offsets, *args, **kwargs)
+
+    def __parse_item(self, line: str, manifest_file: str) -> Dict[str, Any]:
+        item = json.loads(line)
+
+        # Audio file
+        if 'audio_filename' in item:
+            item['audio_file'] = item.pop('audio_filename')
+        elif 'audio_filepath' in item:
+            item['audio_file'] = item.pop('audio_filepath')
+        else:
+            raise ValueError(
+                f"Manifest file has invalid json line " f"structure: {line} without proper audio file key."
+            )
+        item['audio_file'] = os.path.expanduser(item['audio_file'])
+
+        # Duration.
+        if 'duration' not in item:
+            raise ValueError(f"Manifest file has invalid json line " f"structure: {line} without proper duration key.")
+
+        # Label.
+        if 'command' in item:
+            item['label'] = item.pop('command')
+        elif 'target' in item:
+            item['label'] = item.pop('target')
+        elif 'label' in item:
+            pass
+        else:
+            raise ValueError(f"Manifest file has invalid json line " f"structure: {line} without proper label key.")
+
+        item = dict(
+            audio_file=item['audio_file'],
+            duration=item['duration'],
+            label=item['label'],
+            offset=item.get('offset', None),
+        )
 
-        super().__init__(audio_files, durations, texts, *args, **kwargs)
+        return item
diff --git a/nemo/collections/asr/parts/dataset.py b/nemo/collections/asr/parts/dataset.py
index 7f8107e5c9d3..6042a8bd3b0d 100644
--- a/nemo/collections/asr/parts/dataset.py
+++ b/nemo/collections/asr/parts/dataset.py
@@ -53,6 +53,55 @@ def seq_collate_fn(batch, token_pad_value=0):
     return audio_signal, audio_lengths, tokens, tokens_lengths
 
 
+def fixed_seq_collate_fn(batch, fixed_length=16000):
+    """collate batch of audio sig, audio len, tokens, tokens len
+
+    Args:
+        batch (Optional[FloatTensor], Optional[LongTensor], LongTensor,
+               LongTensor):  A tuple of tuples of signal, signal lengths,
+               encoded tokens, and encoded tokens length.  This collate func
+               assumes the signals are 1d torch tensors (i.e. mono audio).
+        fixed_length (Optional[int]): length of input signal to be considered
+
+    """
+    _, audio_lengths, _, tokens_lengths = zip(*batch)
+
+    has_audio = audio_lengths[0] is not None
+    fixed_length = min(fixed_length, max(audio_lengths))
+
+    audio_signal, tokens = [], []
+    for sig, sig_len, tokens_i, _ in batch:
+        if has_audio:
+            sig_len = sig_len.item()
+            chunck_len = sig_len - fixed_length
+            if chunck_len < 0:
+                # pad = (0,fixed_length-sig_len)
+                # signal = torch.nn.functional.pad(sig,pad)
+                repeat = fixed_length // sig_len
+                rem = fixed_length % sig_len
+                sub = sig[-rem:] if rem > 0 else torch.tensor([])
+                rep_sig = torch.cat(repeat * [sig])
+                signal = torch.cat((rep_sig, sub))
+                # print(sig_len,repeat,rem,len(sub),len(rep_sig),len(signal))
+            else:
+                start_idx = torch.randint(0, chunck_len, (1,)) if chunck_len else torch.tensor(0)
+                end_idx = start_idx + fixed_length
+                signal = sig[start_idx:end_idx]
+
+            audio_signal.append(signal)
+        tokens.append(tokens_i)
+
+    if has_audio:
+        audio_signal = torch.stack(audio_signal)
+        audio_lengths = torch.stack(audio_lengths)
+    else:
+        audio_signal, audio_lengths = None, None
+    tokens = torch.stack(tokens)
+    tokens_lengths = torch.stack(tokens_lengths)
+
+    return audio_signal, audio_lengths, tokens, tokens_lengths
+
+
 def audio_seq_collate_fn(batch):
     """
     Collate a batch (iterable of (sample tensor, label tensor) tuples) into
@@ -96,7 +145,7 @@ class AudioDataset(Dataset):
     "/path/to/audio.txt", "duration": 23.147}
     ...
     {"audio_filepath": "/path/to/audio.wav", "text": "the
-    transcription", offset": 301.75, "duration": 0.82, "utt":
+    transcription", "offset": 301.75, "duration": 0.82, "utt":
     "utterance_id", "ctm_utt": "en_4156", "side": "A"}
 
     Args:
@@ -132,11 +181,12 @@ def __init__(
         bos_id=None,
         eos_id=None,
         load_audio=True,
+        parser='en',
     ):
         self.collection = collections.ASRAudioText(
             manifests_files=manifest_filepath.split(','),
-            parser=parsers.ENCharParser(
-                labels=labels, unk_id=unk_index, blank_id=blank_index, do_normalize=normalize,
+            parser=parsers.make_parser(
+                labels=labels, name=parser, unk_id=unk_index, blank_id=blank_index, do_normalize=normalize,
             ),
             min_duration=min_duration,
             max_duration=max_duration,
@@ -152,7 +202,14 @@ def __init__(
     def __getitem__(self, index):
         sample = self.collection[index]
         if self.load_audio:
-            features = self.featurizer.process(sample.audio_file, offset=0, duration=sample.duration, trim=self.trim,)
+            offset = sample.offset
+
+            if offset is None:
+                offset = 0
+
+            features = self.featurizer.process(
+                sample.audio_file, offset=offset, duration=sample.duration, trim=self.trim,
+            )
             f, fl = features, torch.tensor(features.shape[0]).long()
         else:
             f, fl = None, None
@@ -330,3 +387,82 @@ def __getitem__(self, item):
             torch.tensor(tokenized_text, dtype=torch.long),
             torch.tensor(len(tokenized_text), dtype=torch.long),
         )
+
+
+class AudioLabelDataset(Dataset):
+    """
+    Dataset that loads tensors via a json file containing paths to audio
+    files, command class, and durations (in seconds). Each new line is a
+    different sample. Example below:
+
+    {"audio_filepath": "/path/to/audio.wav", "label":
+    "label", "duration": 23.147}
+    ...
+    {"audio_filepath": "/path/to/audio.wav", "label": "label",
+    "offset": 301.75, "duration": 0.82}
+
+    Args:
+        manifest_filepath: Path to manifest json as described above. Can
+            be comma-separated paths.
+        labels (Optional[list]): String containing all the possible labels to map to
+            if None then automatically picks from ASRSpeechLabel collection. 
+        featurizer: Initialized featurizer class that converts paths of
+            audio to feature tensors
+        max_duration: If audio exceeds this length, do not include in dataset
+        min_duration: If audio is less than this length, do not include
+            in dataset
+        trim: Boolean flag whether to trim the audio
+        load_audio: Boolean flag indicate whether do or not load audio
+    """
+
+    def __init__(
+        self,
+        manifest_filepath,
+        featurizer,
+        labels=None,
+        max_duration=None,
+        min_duration=None,
+        trim=False,
+        load_audio=True,
+    ):
+        self.collection = collections.ASRSpeechLabel(
+            manifests_files=manifest_filepath.split(','), min_duration=min_duration, max_duration=max_duration,
+        )
+
+        self.featurizer = featurizer
+        self.trim = trim
+        self.load_audio = load_audio
+
+        self.labels = labels if labels else self.collection.uniq_labels
+        self.num_commands = len(self.labels)
+
+        self.label2id, self.id2label = {}, {}
+        for label_id, label in enumerate(self.labels):
+            self.label2id[label] = label_id
+            self.id2label[label_id] = label
+
+        for idx in range(len(self.labels[:5])):
+            logging.debug(" label id {} and its mapped label {}".format(idx, self.id2label[idx]))
+
+    def __getitem__(self, index):
+        sample = self.collection[index]
+        if self.load_audio:
+            offset = sample.offset
+
+            if offset is None:
+                offset = 0
+
+            features = self.featurizer.process(
+                sample.audio_file, offset=offset, duration=sample.duration, trim=self.trim
+            )
+            f, fl = features, torch.tensor(features.shape[0]).long()
+        else:
+            f, fl = None, None
+
+        t = self.label2id[sample.label]
+        tl = 1  # For compatibility with collate_fn used later
+
+        return f, fl, torch.tensor(t).long(), torch.tensor(tl).long()
+
+    def __len__(self):
+        return len(self.collection)
diff --git a/nemo/collections/asr/parts/jasper.py b/nemo/collections/asr/parts/jasper.py
index 574926db4356..a07fd1fb3b50 100644
--- a/nemo/collections/asr/parts/jasper.py
+++ b/nemo/collections/asr/parts/jasper.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional, Tuple
+from typing import Callable, List, Optional, Tuple
 
 import torch
 import torch.nn as nn
@@ -28,7 +28,7 @@
 def init_weights(m, mode='xavier_uniform'):
     if isinstance(m, MaskedConv1d):
         init_weights(m.conv, mode)
-    if isinstance(m, nn.Conv1d):
+    if isinstance(m, (nn.Conv1d, nn.Linear)):
         if mode == 'xavier_uniform':
             nn.init.xavier_uniform_(m.weight, gain=1.0)
         elif mode == 'xavier_normal':
@@ -49,6 +49,14 @@ def init_weights(m, mode='xavier_uniform'):
             nn.init.zeros_(m.bias)
 
 
+def compute_new_kernel_size(kernel_size, kernel_width):
+    new_kernel_size = max(int(kernel_size * kernel_width), 1)
+    # If kernel is even shape, round up to make it odd
+    if new_kernel_size % 2 == 0:
+        new_kernel_size += 1
+    return new_kernel_size
+
+
 def get_same_padding(kernel_size, stride, dilation):
     if stride > 1 and dilation > 1:
         raise ValueError("Only stride OR dilation may be greater than 1")
@@ -57,6 +65,34 @@ def get_same_padding(kernel_size, stride, dilation):
     return kernel_size // 2
 
 
+class StatsPoolLayer(nn.Module):
+    def __init__(self, gram=False, super_vector=False):
+        super().__init__()
+        self.gram = gram
+        self.super = super_vector
+
+    def forward(self, encoder_output):
+
+        mean = encoder_output.mean(dim=-1)  # Time Axis
+        std = encoder_output.std(dim=-1)
+
+        pooled = torch.cat([mean, std], dim=-1)
+
+        if self.gram:
+            time_len = encoder_output.shape[-1]
+            # encoder_output = encoder_output
+            cov = encoder_output.bmm(encoder_output.transpose(2, 1))  # cov matrix
+            cov = cov.view(cov.shape[0], -1) / (time_len)
+
+        if self.gram and not self.super:
+            return cov
+
+        if self.super and self.gram:
+            pooled = torch.cat([pooled, cov], dim=-1)
+
+        return pooled
+
+
 class MaskedConv1d(nn.Module):
     __constants__ = ["use_conv_mask", "real_out_channels", "heads"]
 
@@ -142,6 +178,69 @@ def forward(self, x):
         return x
 
 
+class SqueezeExcite(nn.Module):
+    def __init__(
+        self,
+        channels: int,
+        reduction_ratio: int,
+        context_window: int = -1,
+        interpolation_mode: str = 'nearest',
+        activation: Optional[Callable] = None,
+    ):
+        """
+        Squeeze-and-Excitation sub-module.
+
+        Args:
+            channels: Input number of channels.
+            reduction_ratio: Reduction ratio for "squeeze" layer.
+            context_window: Integer number of timesteps that the context
+                should be computed over, using stride 1 average pooling.
+                If value < 1, then global context is computed.
+            interpolation_mode: Interpolation mode of timestep dimension.
+                Used only if context window is > 1.
+                The modes available for resizing are: `nearest`, `linear` (3D-only),
+                `bilinear`, `area`
+            activation: Intermediate activation function used. Must be a
+                callable activation function.
+        """
+        super(SqueezeExcite, self).__init__()
+        self.context_window = int(context_window)
+        self.interpolation_mode = interpolation_mode
+
+        if self.context_window <= 0:
+            self.pool = nn.AdaptiveAvgPool1d(1)  # context window = T
+        else:
+            self.pool = nn.AvgPool1d(self.context_window, stride=1)
+
+        if activation is None:
+            activation = nn.ReLU(inplace=True)
+
+        self.fc = nn.Sequential(
+            nn.Linear(channels, channels // reduction_ratio, bias=False),
+            activation,
+            nn.Linear(channels // reduction_ratio, channels, bias=False),
+        )
+
+    def forward(self, x):
+        batch, channels, timesteps = x.size()
+        y = self.pool(x)  # [B, C, T - context_window + 1]
+        y = y.transpose(1, 2)  # [B, T - context_window + 1, C]
+        y = self.fc(y)  # [B, T - context_window + 1, C]
+        y = y.transpose(1, 2)  # [B, C, T - context_window + 1]
+
+        if self.context_window > 0:
+            y = torch.nn.functional.interpolate(y, size=timesteps, mode=self.interpolation_mode)
+
+        y = torch.sigmoid(y)
+
+        return x * y
+
+
+class Swish(nn.Module):
+    def forward(self, x):
+        return x * torch.sigmoid(x)
+
+
 class JasperBlock(nn.Module):
     __constants__ = ["conv_mask", "separable", "residual_mode", "res", "mconv"]
 
@@ -151,6 +250,7 @@ def __init__(
         planes,
         repeat=3,
         kernel_size=11,
+        kernel_size_factor=1,
         stride=1,
         dilation=1,
         padding='same',
@@ -165,27 +265,45 @@ def __init__(
         residual_mode='add',
         residual_panes=[],
         conv_mask=False,
+        se=False,
+        se_reduction_ratio=16,
+        se_context_window=None,
+        se_interpolation_mode='nearest',
+        stride_last=False,
     ):
         super(JasperBlock, self).__init__()
 
         if padding != "same":
             raise ValueError("currently only 'same' padding is supported")
 
+        kernel_size_factor = float(kernel_size_factor)
+        if type(kernel_size) in (list, tuple):
+            kernel_size = [compute_new_kernel_size(k, kernel_size_factor) for k in kernel_size]
+        else:
+            kernel_size = compute_new_kernel_size(kernel_size, kernel_size_factor)
+
         padding_val = get_same_padding(kernel_size[0], stride[0], dilation[0])
         self.conv_mask = conv_mask
         self.separable = separable
         self.residual_mode = residual_mode
+        self.se = se
 
         inplanes_loop = inplanes
         conv = nn.ModuleList()
 
         for _ in range(repeat - 1):
+            # Stride last means only the last convolution in block will have stride
+            if stride_last:
+                stride_val = [1]
+            else:
+                stride_val = stride
+
             conv.extend(
                 self._get_conv_bn_layer(
                     inplanes_loop,
                     planes,
                     kernel_size=kernel_size,
-                    stride=stride,
+                    stride=stride_val,
                     dilation=dilation,
                     padding=padding_val,
                     groups=groups,
@@ -216,6 +334,17 @@ def __init__(
             )
         )
 
+        if se:
+            conv.append(
+                SqueezeExcite(
+                    planes,
+                    reduction_ratio=se_reduction_ratio,
+                    context_window=se_context_window,
+                    interpolation_mode=se_interpolation_mode,
+                    activation=activation,
+                )
+            )
+
         self.mconv = conv
 
         res_panes = residual_panes.copy()
@@ -223,17 +352,29 @@ def __init__(
 
         if residual:
             res_list = nn.ModuleList()
+
+            if residual_mode == 'stride_add':
+                stride_val = stride
+            else:
+                stride_val = [1]
+
             if len(residual_panes) == 0:
                 res_panes = [inplanes]
                 self.dense_residual = False
             for ip in res_panes:
-                res_list.append(
-                    nn.ModuleList(
-                        self._get_conv_bn_layer(
-                            ip, planes, kernel_size=1, normalization=normalization, norm_groups=norm_groups,
-                        )
+                res = nn.ModuleList(
+                    self._get_conv_bn_layer(
+                        ip,
+                        planes,
+                        kernel_size=1,
+                        normalization=normalization,
+                        norm_groups=norm_groups,
+                        stride=stride_val,
                     )
                 )
+
+                res_list.append(res)
+
             self.res = res_list
         else:
             self.res = None
@@ -388,7 +529,7 @@ def forward(self, input_: Tuple[List[Tensor], Optional[Tensor]]):
                     else:
                         res_out = res_layer(res_out)
 
-                if self.residual_mode == 'add':
+                if self.residual_mode == 'add' or self.residual_mode == 'stride_add':
                     out = out + res_out
                 else:
                     out = torch.max(out, res_out)
@@ -399,3 +540,7 @@ def forward(self, input_: Tuple[List[Tensor], Optional[Tensor]]):
             return xs + [out], lens
 
         return [out], lens
+
+
+# Register swish activation function
+jasper_activations['swish'] = Swish
diff --git a/nemo/collections/asr/parts/manifest.py b/nemo/collections/asr/parts/manifest.py
index 5107dbe4062e..5ff73822ce42 100644
--- a/nemo/collections/asr/parts/manifest.py
+++ b/nemo/collections/asr/parts/manifest.py
@@ -1,24 +1,26 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import json
 from os.path import expanduser
-from typing import Any, Dict, Iterator, List, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union
 
 
 class ManifestBase:
     def __init__(self, *args, **kwargs):
         raise ValueError(
-            "This class is deprecated, look at " "https://github.com/NVIDIA/NeMo/pull/284 for " "correct behaviour."
+            "This class is deprecated, look at https://github.com/NVIDIA/NeMo/pull/284 for correct behaviour."
         )
 
 
 class ManifestEN:
     def __init__(self, *args, **kwargs):
         raise ValueError(
-            "This class is deprecated, look at " "https://github.com/NVIDIA/NeMo/pull/284 for " "correct behaviour."
+            "This class is deprecated, look at https://github.com/NVIDIA/NeMo/pull/284 for correct behaviour."
         )
 
 
-def item_iter(manifests_files: Union[str, List[str]]) -> Iterator[Dict[str, Any]]:
+def item_iter(
+    manifests_files: Union[str, List[str]], parse_func: Callable[[str, Optional[str]], Dict[str, Any]] = None
+) -> Iterator[Dict[str, Any]]:
     """Iterate through json lines of provided manifests.
 
     NeMo ASR pipelines often assume certain manifest files structure. In
@@ -32,6 +34,10 @@ def item_iter(manifests_files: Union[str, List[str]]) -> Iterator[Dict[str, Any]
         manifests_files: Either single string file or list of such -
             manifests to yield items from.
 
+        parse_func: A callable function which accepts as input a single line
+            of a manifest and optionally the manifest file itself,
+            and parses it, returning a dictionary mapping from str -> Any.
+
     Yields:
         Parsed key to value item dicts.
 
@@ -42,10 +48,13 @@ def item_iter(manifests_files: Union[str, List[str]]) -> Iterator[Dict[str, Any]
     if isinstance(manifests_files, str):
         manifests_files = [manifests_files]
 
+    if parse_func is None:
+        parse_func = __parse_item
+
     for manifest_file in manifests_files:
         with open(expanduser(manifest_file), 'r') as f:
             for line in f:
-                item = __parse_item(line, manifest_file)
+                item = parse_func(line, manifest_file)
 
                 yield item
 
diff --git a/nemo/collections/asr/parts/numba_utils.py b/nemo/collections/asr/parts/numba_utils.py
new file mode 100644
index 000000000000..f7685e0b4ea9
--- /dev/null
+++ b/nemo/collections/asr/parts/numba_utils.py
@@ -0,0 +1,93 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from numba import jit
+
+
+def phase_vocoder(D: np.ndarray, rate: float, phi_advance: np.ndarray, scale_buffer: np.ndarray):
+    """
+    Optimized implementation of phase vocoder from Librosa.
+
+    Reference implementation:
+        - https://librosa.github.io/librosa/generated/librosa.core.phase_vocoder.html
+
+    Args:
+        D: Complex spectograms of shape [d, t, complex=2].
+        rate: Speed rate, must be float greater than 0.
+        phi_advance: Precomputed phase advance buffer array of length [n_fft + 1]
+        scale_buffer: Precomputed numpy buffer array of length [n_fft + 1]
+
+    Returns:
+        Complex64 ndarray of shape [d, t / rate, complex=2]
+    """
+    time_steps = np.arange(0, D.shape[1], rate, dtype=np.float)
+
+    # Create an empty output array
+    d_stretch = np.zeros((D.shape[0], len(time_steps)), D.dtype, order='F')
+
+    # Phase accumulator; initialize to the first sample
+    phase_acc = np.angle(D[:, 0])
+
+    # Pad 0 columns to simplify boundary logic
+    D = np.pad(D, [(0, 0), (0, 2)], mode='constant')
+
+    d_stretch = _phase_vocoder_kernel(D, time_steps, phi_advance, d_stretch, phase_acc, scale_buffer)
+
+    return d_stretch
+
+
+@jit(nopython=True, nogil=True)
+def _phase_vocoder_kernel(D, time_steps, phi_advance, d_stretch, phase_acc, scale_buffer):
+    """
+    Numba optimized kernel to compute the phase vocoder step.
+
+    Args:
+        D: Complex spectograms of shape [d, t, complex=2].
+        rate: Speed rate, must be float greater than 0.
+        time_steps: Numpy ndarray of linearly spaced time steps, shape = [t]
+        phi_advance: Precomputed phase advance buffer array of length [n_fft + 1]
+        d_stretch: Output complex matrix of shape [d, t / rate, complex=2]
+        phase_acc: Phase accumulator initialized to first sample of shape [d, complex=2]
+        scale_buffer: Precomputed numpy buffer array of length [n_fft + 1]
+
+    Returns:
+        Complex64 ndarray of shape [d, t / rate, complex=2]
+    """
+    two_pi = 2.0 * np.pi
+
+    for (t, step) in enumerate(time_steps):
+        columns = D[:, int(step) : int(step + 2)]
+        columns_0 = columns[:, 0]
+        columns_1 = columns[:, 1]
+
+        # Weighting for linear magnitude interpolation
+        alpha = np.mod(step, 1.0)
+        mag = (1.0 - alpha) * np.abs(columns_0) + alpha * np.abs(columns_1)
+
+        # Store to output array
+        d_stretch[:, t] = mag * np.exp(1.0j * phase_acc)
+
+        # Compute phase advance
+        dphase = np.angle(columns_1) - np.angle(columns_0) - phi_advance
+
+        # Wrap to -pi:pi range
+        scale = dphase / two_pi
+        np.round(scale, 0, scale_buffer)
+
+        dphase = dphase - two_pi * scale_buffer
+
+        # Accumulate phase
+        phase_acc += phi_advance + dphase
+
+    return d_stretch
diff --git a/nemo/collections/asr/parts/perturb.py b/nemo/collections/asr/parts/perturb.py
index 63e062e44bbd..f52635fbec06 100644
--- a/nemo/collections/asr/parts/perturb.py
+++ b/nemo/collections/asr/parts/perturb.py
@@ -3,12 +3,20 @@
 import random
 
 import librosa
+import numpy as np
 from scipy import signal
 
 from nemo import logging
 from nemo.collections.asr.parts import collections, parsers
 from nemo.collections.asr.parts.segment import AudioSegment
 
+try:
+    from nemo.collections.asr.parts import numba_utils
+
+    HAVE_NUMBA = True
+except (ImportError, ModuleNotFoundError):
+    HAVE_NUMBA = False
+
 
 class Perturbation(object):
     def max_augmentation_length(self, length):
@@ -19,20 +27,174 @@ def perturb(self, data):
 
 
 class SpeedPerturbation(Perturbation):
-    def __init__(self, min_speed_rate=0.85, max_speed_rate=1.15, rng=None):
+    def __init__(self, sr, resample_type, min_speed_rate=0.9, max_speed_rate=1.1, num_rates=5, rng=None):
+        """
+        Performs Speed Augmentation by re-sampling the data to a different sampling rate,
+        which does not preserve pitch.
+
+        Note: This is a very slow operation for online augmentation. If space allows,
+        it is preferable to pre-compute and save the files to augment the dataset.
+
+        Args:
+            sr: Original sampling rate.
+            resample_type: Type of resampling operation that will be performed.
+                For better speed using `resampy`'s fast resampling method, use `resample_type='kaiser_fast'`.
+                For high-quality resampling, set `resample_type='kaiser_best'`.
+                To use `scipy.signal.resample`, set `resample_type='fft'` or `resample_type='scipy'`
+            min_speed_rate: Minimum sampling rate modifier.
+            max_speed_rate: Maximum sampling rate modifier.
+            num_rates: Number of discrete rates to allow. Can be a positive or negative
+                integer.
+                If a positive integer greater than 0 is provided, the range of
+                speed rates will be discretized into `num_rates` values.
+                If a negative integer or 0 is provided, the full range of speed rates
+                will be sampled uniformly.
+                Note: If a positive integer is provided and the resultant discretized
+                range of rates contains the value '1.0', then those samples with rate=1.0,
+                will not be augmented at all and simply skipped. This is to unnecessary
+                augmentation and increase computation time. Effective augmentation chance
+                in such a case is = `prob * (num_rates - 1 / num_rates) * 100`% chance
+                where `prob` is the global probability of a sample being augmented.
+            rng: Random seed number.
+        """
+        min_rate = min(min_speed_rate, max_speed_rate)
+        if min_rate < 0.0:
+            raise ValueError("Minimum sampling rate modifier must be > 0.")
+
+        if resample_type not in ('kaiser_best', 'kaiser_fast', 'fft', 'scipy'):
+            raise ValueError("Supported `resample_type` values are ('kaiser_best', 'kaiser_fast', 'fft', 'scipy')")
+
+        self._sr = sr
+        self._min_rate = min_speed_rate
+        self._max_rate = max_speed_rate
+        self._num_rates = num_rates
+        if num_rates > 0:
+            self._rates = np.linspace(self._min_rate, self._max_rate, self._num_rates, endpoint=True)
+        self._res_type = resample_type
+        self._rng = random.Random() if rng is None else rng
+
+    def max_augmentation_length(self, length):
+        return length * self._max_rate
+
+    def perturb(self, data):
+        # Select speed rate either from choice or random sample
+        if self._num_rates < 0:
+            speed_rate = self._rng.uniform(self._min_rate, self._max_rate)
+        else:
+            speed_rate = self._rng.choice(self._rates)
+
+        # Skip perturbation in case of identity speed rate
+        if speed_rate == 1.0:
+            return
+
+        new_sr = int(self._sr * speed_rate)
+        data._samples = librosa.core.resample(data._samples, self._sr, new_sr, res_type=self._res_type)
+
+
+class TimeStretchPerturbation(Perturbation):
+    def __init__(self, min_speed_rate=0.9, max_speed_rate=1.1, num_rates=5, n_fft=512, rng=None):
+        """
+        Time-stretch an audio series by a fixed rate while preserving pitch, based on [1, 2].
+
+        Note:
+        This is a simplified implementation, intended primarily for reference and pedagogical purposes.
+        It makes no attempt to handle transients, and is likely to produce audible artifacts.
+
+        Reference
+        [1] [Ellis, D. P. W. “A phase vocoder in Matlab.” Columbia University, 2002.]
+        (http://www.ee.columbia.edu/~dpwe/resources/matlab/pvoc/)
+        [2] [librosa.effects.time_stretch]
+        (https://librosa.github.io/librosa/generated/librosa.effects.time_stretch.html)
+
+        Args:
+            min_speed_rate: Minimum sampling rate modifier.
+            max_speed_rate: Maximum sampling rate modifier.
+            num_rates: Number of discrete rates to allow. Can be a positive or negative
+                integer.
+                If a positive integer greater than 0 is provided, the range of
+                speed rates will be discretized into `num_rates` values.
+                If a negative integer or 0 is provided, the full range of speed rates
+                will be sampled uniformly.
+                Note: If a positive integer is provided and the resultant discretized
+                range of rates contains the value '1.0', then those samples with rate=1.0,
+                will not be augmented at all and simply skipped. This is to avoid unnecessary
+                augmentation and increase computation time. Effective augmentation chance
+                in such a case is = `prob * (num_rates - 1 / num_rates) * 100`% chance
+                where `prob` is the global probability of a sample being augmented.
+            n_fft: Number of fft filters to be computed.
+            rng: Random seed number.
+        """
+        min_rate = min(min_speed_rate, max_speed_rate)
+        if min_rate < 0.0:
+            raise ValueError("Minimum sampling rate modifier must be > 0.")
+
         self._min_rate = min_speed_rate
         self._max_rate = max_speed_rate
+        self._num_rates = num_rates
+        if num_rates > 0:
+            self._rates = np.linspace(self._min_rate, self._max_rate, self._num_rates, endpoint=True)
         self._rng = random.Random() if rng is None else rng
 
+        # Pre-compute constants
+        self._n_fft = int(n_fft)
+        self._hop_length = int(n_fft // 2)
+
+        # Pre-allocate buffers
+        self._phi_advance_fast = np.linspace(0, np.pi * self._hop_length, self._hop_length + 1)
+        self._scale_buffer_fast = np.empty(self._hop_length + 1, dtype=np.float32)
+
+        self._phi_advance_slow = np.linspace(0, np.pi * self._n_fft, self._n_fft + 1)
+        self._scale_buffer_slow = np.empty(self._n_fft + 1, dtype=np.float32)
+
     def max_augmentation_length(self, length):
         return length * self._max_rate
 
     def perturb(self, data):
-        speed_rate = self._rng.uniform(self._min_rate, self._max_rate)
-        if speed_rate <= 0:
-            raise ValueError("speed_rate should be greater than zero.")
-        logging.debug("speed: %f", speed_rate)
-        data._samples = librosa.effects.time_stretch(data._samples, speed_rate)
+        # Select speed rate either from choice or random sample
+        if self._num_rates < 0:
+            speed_rate = self._rng.uniform(self._min_rate, self._max_rate)
+        else:
+            speed_rate = self._rng.choice(self._rates)
+
+        # Skip perturbation in case of identity speed rate
+        if speed_rate == 1.0:
+            return
+
+        # Increase `n_fft` based on task (speed up or slow down audio)
+        # This greatly reduces upper bound of maximum time taken
+        # to compute slowed down audio segments.
+        if speed_rate >= 1.0:  # Speed up audio
+            fft_multiplier = 1
+            phi_advance = self._phi_advance_fast
+            scale_buffer = self._scale_buffer_fast
+
+        else:  # Slow down audio
+            fft_multiplier = 2
+            phi_advance = self._phi_advance_slow
+            scale_buffer = self._scale_buffer_slow
+
+        n_fft = int(self._n_fft * fft_multiplier)
+        hop_length = int(self._hop_length * fft_multiplier)
+
+        # Perform short-term Fourier transform (STFT)
+        stft = librosa.core.stft(data._samples, n_fft=n_fft, hop_length=hop_length)
+
+        # Stretch by phase vocoding
+        if HAVE_NUMBA:
+            stft_stretch = numba_utils.phase_vocoder(stft, speed_rate, phi_advance, scale_buffer)
+
+        else:
+            stft_stretch = librosa.core.phase_vocoder(stft, speed_rate, hop_length)
+
+        # Predict the length of y_stretch
+        len_stretch = int(round(len(data._samples) / speed_rate))
+
+        # Invert the STFT
+        y_stretch = librosa.core.istft(
+            stft_stretch, dtype=data._samples.dtype, hop_length=hop_length, length=len_stretch
+        )
+
+        data._samples = y_stretch
 
 
 class GainPerturbation(Perturbation):
@@ -43,7 +205,7 @@ def __init__(self, min_gain_dbfs=-10, max_gain_dbfs=10, rng=None):
 
     def perturb(self, data):
         gain = self._rng.uniform(self._min_gain_dbfs, self._max_gain_dbfs)
-        logging.debug("gain: %d", gain)
+        # logging.debug("gain: %d", gain)
         data._samples = data._samples * (10.0 ** (gain / 20.0))
 
 
@@ -54,9 +216,10 @@ def __init__(self, manifest_path=None, rng=None):
 
     def perturb(self, data):
         impulse_record = self._rng.sample(self._manifest.data, 1)[0]
-        impulse = AudioSegment.from_file(impulse_record['audio_filepath'], target_sr=data.sample_rate)
-        logging.debug("impulse: %s", impulse_record['audio_filepath'])
-        data._samples = signal.fftconvolve(data.samples, impulse.samples, "full")
+        impulse = AudioSegment.from_file(impulse_record.audio_file, target_sr=data.sample_rate)
+        # logging.debug("impulse: %s", impulse_record['audio_filepath'])
+        impulse_norm = (impulse.samples - min(impulse.samples)) / (max(impulse.samples) - min(impulse.samples))
+        data._samples = signal.fftconvolve(data._samples, impulse_norm, "same")
 
 
 class ShiftPerturbation(Perturbation):
@@ -71,7 +234,7 @@ def perturb(self, data):
             # TODO: do something smarter than just ignore this condition
             return
         shift_samples = int(shift_ms * data.sample_rate // 1000)
-        logging.debug("shift: %s", shift_samples)
+        # logging.debug("shift: %s", shift_samples)
         if shift_samples < 0:
             data._samples[-shift_samples:] = data._samples[:shift_samples]
             data._samples[:-shift_samples] = 0
@@ -93,28 +256,58 @@ def __init__(
     def perturb(self, data):
         snr_db = self._rng.uniform(self._min_snr_db, self._max_snr_db)
         noise_record = self._rng.sample(self._manifest.data, 1)[0]
-        noise = AudioSegment.from_file(noise_record['audio_filepath'], target_sr=data.sample_rate)
+        noise = AudioSegment.from_file(noise_record.audio_file, target_sr=data.sample_rate)
         noise_gain_db = min(data.rms_db - noise.rms_db - snr_db, self._max_gain_db)
-        logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record['audio_filepath'])
+        # logging.debug("noise: %s %s %s", snr_db, noise_gain_db, noise_record.audio_file)
 
         # calculate noise segment to use
         start_time = self._rng.uniform(0.0, noise.duration - data.duration)
-        noise.subsegment(start_time=start_time, end_time=start_time + data.duration)
+        if noise.duration > (start_time + data.duration):
+            noise.subsegment(start_time=start_time, end_time=start_time + data.duration)
 
         # adjust gain for snr purposes and superimpose
         noise.gain_db(noise_gain_db)
-        data._samples = data._samples + noise.samples
+
+        if noise._samples.shape[0] < data._samples.shape[0]:
+            noise_idx = self._rng.randint(0, data._samples.shape[0] - noise._samples.shape[0])
+            data._samples[noise_idx : noise_idx + noise._samples.shape[0]] += noise._samples
+
+        else:
+            data._samples += noise._samples
+
+
+class WhiteNoisePerturbation(Perturbation):
+    def __init__(self, min_level=-90, max_level=-46, rng=None):
+        self.min_level = int(min_level)
+        self.max_level = int(max_level)
+        self._rng = np.random.RandomState() if rng is None else rng
+
+    def perturb(self, data):
+        noise_level_db = self._rng.randint(self.min_level, self.max_level, dtype='int32')
+        noise_signal = self._rng.randn(data._samples.shape[0]) * (10.0 ** (noise_level_db / 20.0))
+        data._samples += noise_signal
 
 
 perturbation_types = {
     "speed": SpeedPerturbation,
+    "time_stretch": TimeStretchPerturbation,
     "gain": GainPerturbation,
     "impulse": ImpulsePerturbation,
     "shift": ShiftPerturbation,
     "noise": NoisePerturbation,
+    "white_noise": WhiteNoisePerturbation,
 }
 
 
+def register_perturbation(name: str, perturbation: Perturbation):
+    if name in perturbation_types.keys():
+        raise KeyError(
+            f"Perturbation with the name {name} exists. " f"Type of perturbation : {perturbation_types[name]}."
+        )
+
+    perturbation_types[name] = perturbation
+
+
 class AudioAugmentor(object):
     def __init__(self, perturbations=None, rng=None):
         self._rng = random.Random() if rng is None else rng
diff --git a/nemo/collections/asr/parts/segment.py b/nemo/collections/asr/parts/segment.py
index fba34b196f78..fbe094e3eafb 100644
--- a/nemo/collections/asr/parts/segment.py
+++ b/nemo/collections/asr/parts/segment.py
@@ -49,7 +49,7 @@ def __ne__(self, other):
 
     def __str__(self):
         """Return human-readable representation of segment."""
-        return "%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " "rms=%.2fdB" % (
+        return "%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, rms=%.2fdB" % (
             type(self),
             self.num_samples,
             self.sample_rate,
@@ -75,18 +75,18 @@ def _convert_samples_to_float32(samples):
 
     @classmethod
     def from_file(
-        cls, filename, target_sr=None, int_values=False, offset=0, duration=0, trim=False,
+        cls, audio_file, target_sr=None, int_values=False, offset=0, duration=0, trim=False,
     ):
         """
         Load a file supported by librosa and return as an AudioSegment.
-        :param filename: path of file to load
+        :param audio_file: path of file to load
         :param target_sr: the desired sample rate
         :param int_values: if true, load samples as 32-bit integers
         :param offset: offset in seconds when loading audio
         :param duration: duration in seconds when loading audio
         :return: numpy array of samples
         """
-        with sf.SoundFile(filename, 'r') as f:
+        with sf.SoundFile(audio_file, 'r') as f:
             dtype = 'int32' if int_values else 'float32'
             sample_rate = f.samplerate
             if offset > 0:
@@ -100,11 +100,13 @@ def from_file(
         return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
 
     @classmethod
-    def segment_from_file(cls, filename, target_sr=None, n_segments=0, trim=False):
-        """Grabs n_segments number of samples from filename randomly from the
+    def segment_from_file(cls, audio_file, target_sr=None, n_segments=0, trim=False):
+        """Grabs n_segments number of samples from audio_file randomly from the
         file as opposed to at a specified offset.
+
+        Note that audio_file can be either the file path, or a file-like object.
         """
-        with sf.SoundFile(filename, 'r') as f:
+        with sf.SoundFile(audio_file, 'r') as f:
             sample_rate = f.samplerate
             if n_segments > 0 and len(f) > n_segments:
                 max_audio_start = len(f) - n_segments
@@ -168,15 +170,15 @@ def subsegment(self, start_time=None, end_time=None):
         if end_time < 0.0:
             end_time = self.duration + end_time
         if start_time < 0.0:
-            raise ValueError("The slice start position (%f s) is out of " "bounds." % start_time)
+            raise ValueError("The slice start position (%f s) is out of bounds." % start_time)
         if end_time < 0.0:
             raise ValueError("The slice end position (%f s) is out of bounds." % end_time)
         if start_time > end_time:
             raise ValueError(
-                "The slice start position (%f s) is later than " "the end position (%f s)." % (start_time, end_time)
+                "The slice start position (%f s) is later than the end position (%f s)." % (start_time, end_time)
             )
         if end_time > self.duration:
-            raise ValueError("The slice end position (%f s) is out of bounds " "(> %f s)" % (end_time, self.duration))
+            raise ValueError("The slice end position (%f s) is out of bounds (> %f s)" % (end_time, self.duration))
         start_sample = int(round(start_time * self._sample_rate))
         end_sample = int(round(end_time * self._sample_rate))
         self._samples = self._samples[start_sample:end_sample]
diff --git a/nemo/collections/asr/parts/spectr_augment.py b/nemo/collections/asr/parts/spectr_augment.py
index ff733cc2f352..a2f4bd2f587a 100755
--- a/nemo/collections/asr/parts/spectr_augment.py
+++ b/nemo/collections/asr/parts/spectr_augment.py
@@ -14,7 +14,12 @@ class SpecAugment(nn.Module):
     freq_masks - how many frequency segments should be cut
     time_masks - how many time segments should be cut
     freq_width - maximum number of frequencies to be cut in one segment
-    time_width - maximum number of time steps to be cut in one segment
+    time_width - maximum number of time steps to be cut in one segment.
+        Can be a positive integer or a float value in the range [0, 1].
+        If positive integer value, defines maximum number of time steps
+        to be cut in one segment.
+        If a float value, defines maximum percentage of timesteps that
+        are cut adaptively.
     """
 
     def __init__(
@@ -30,10 +35,23 @@ def __init__(
         self.freq_width = freq_width
         self.time_width = time_width
 
+        if isinstance(time_width, int):
+            self.adaptive_temporal_width = False
+        else:
+            if time_width > 1.0 or time_width < 0.0:
+                raise ValueError('If `time_width` is a float value, must be in range [0, 1]')
+
+            self.adaptive_temporal_width = True
+
     @torch.no_grad()
     def forward(self, x):
         sh = x.shape
 
+        if self.adaptive_temporal_width:
+            time_width = max(1, int(sh[2] * self.time_width))
+        else:
+            time_width = self.time_width
+
         mask = torch.zeros(x.shape).byte()
 
         for idx in range(sh[0]):
@@ -45,9 +63,9 @@ def forward(self, x):
                 mask[idx, x_left : x_left + w, :] = 1
 
             for i in range(self.time_masks):
-                y_left = int(self._rng.uniform(0, sh[2] - self.time_width))
+                y_left = int(self._rng.uniform(0, sh[2] - time_width))
 
-                w = int(self._rng.uniform(0, self.time_width))
+                w = int(self._rng.uniform(0, time_width))
 
                 mask[idx, :, y_left : y_left + w] = 1
 
diff --git a/nemo/collections/nlp/README.md b/nemo/collections/nlp/README.md
index 75216d906b4d..a105455fad7c 100644
--- a/nemo/collections/nlp/README.md
+++ b/nemo/collections/nlp/README.md
@@ -1,11 +1,16 @@
 NeMo NLP Collection: Neural Modules for Natural Language Processing
 ===================================================================
 
-**Models**
-
-    * Neural Machine Translation
-    * BERT
-    * Intent Classification and Slot filling
-    * Named Entity Recognition
+**Supported Tasks and Models**
+  *  Intent Detection and Slot Filling
+  *  Text Classification
+  *  State Tracking for Task-oriented Dialogue Systems
+  *  Language Modelling
+  * Neural Machine Translation
+  * Question Answering
+  * Name Entity Recognition (NER)
+  * Punctuation and Capitalization
+  * GLUE Benchmark
+  * ASR Postprocessing with BERT
 
 See the documentation and tutorials for ``nemo_nlp`` [here](https://nvidia.github.io/NeMo/nlp/intro.html).
\ No newline at end of file
diff --git a/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py b/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py
index 4ca9e5c25ff1..2cb173933745 100644
--- a/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py
+++ b/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py
@@ -17,15 +17,20 @@
 import random
 
 import numpy as np
-from sklearn.metrics import classification_report
 
 from nemo import logging
-from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list
+from nemo.collections.nlp.utils.callback_utils import (
+    get_classification_report,
+    get_f1_scores,
+    list2str,
+    plot_confusion_matrix,
+    tensor2list,
+)
 
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
 
-def eval_iter_callback(tensors, global_vars, eval_data_layer):
+def eval_iter_callback(tensors, global_vars):
     if "all_intent_preds" not in global_vars.keys():
         global_vars["all_intent_preds"] = []
     if "all_intent_labels" not in global_vars.keys():
@@ -75,7 +80,7 @@ def eval_iter_callback(tensors, global_vars, eval_data_layer):
     global_vars["all_subtokens_mask"].extend(all_subtokens_mask)
 
 
-def eval_epochs_done_callback(global_vars, graph_fold):
+def eval_epochs_done_callback(global_vars, intents_label_ids, slots_label_ids, graph_fold=None, normalize_cm=True):
     intent_labels = np.asarray(global_vars['all_intent_labels'])
     intent_preds = np.asarray(global_vars['all_intent_preds'])
 
@@ -96,23 +101,31 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i : i + sample_size]))
     logging.info("Sampled slots: [%s]" % list2str(slot_labels[i : i + sample_size]))
 
-    plot_confusion_matrix(intent_labels, intent_preds, graph_fold)
-
-    logging.info('Intent prediction results')
-    correct_preds = sum(intent_labels == intent_preds)
-    intent_accuracy = correct_preds / intent_labels.shape[0]
-    logging.info(f'Intent accuracy: {intent_accuracy}')
-    logging.info(
-        f'Classification report:\n \
-        {classification_report(intent_labels, intent_preds)}'
-    )
-
-    logging.info('Slot prediction results')
-    slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0]
-    logging.info(f'Slot accuracy: {slot_accuracy}')
-    logging.info(
-        f'Classification report:\n \
-        {classification_report(slot_labels[:-2], slot_preds[:-2])}'
-    )
+    if graph_fold:
+        # calculate, plot and save the confusion_matrix
+        plot_confusion_matrix(
+            intent_labels, intent_preds, graph_fold, intents_label_ids, normalize=normalize_cm, prefix='Intent'
+        )
+        plot_confusion_matrix(
+            slot_labels, slot_preds, graph_fold, slots_label_ids, normalize=normalize_cm, prefix='Slot'
+        )
+
+    logging.info('Slot Prediction Results:')
+    slot_accuracy = np.mean(slot_labels == slot_preds)
+    logging.info(f'Slot Accuracy: {slot_accuracy}')
+    f1_scores = get_f1_scores(slot_labels, slot_preds, average_modes=['weighted', 'macro', 'micro'])
+    for k, v in f1_scores.items():
+        logging.info(f'{k}: {v}')
+
+    logging.info(f'\n {get_classification_report(slot_labels, slot_preds, label_ids=slots_label_ids)}')
+
+    logging.info('Intent Prediction Results:')
+    intent_accuracy = np.mean(intent_labels == intent_preds)
+    logging.info(f'Intent Accuracy: {intent_accuracy}')
+    f1_scores = get_f1_scores(intent_labels, intent_preds, average_modes=['weighted', 'macro', 'micro'])
+    for k, v in f1_scores.items():
+        logging.info(f'{k}: {v}')
+
+    logging.info(f'\n {get_classification_report(intent_labels, intent_preds, label_ids=intents_label_ids)}')
 
     return dict({'intent_accuracy': intent_accuracy, 'slot_accuracy': slot_accuracy})
diff --git a/nemo/collections/nlp/callbacks/lm_bert_callback.py b/nemo/collections/nlp/callbacks/lm_bert_callback.py
index 33f20df37079..1024ff71c976 100644
--- a/nemo/collections/nlp/callbacks/lm_bert_callback.py
+++ b/nemo/collections/nlp/callbacks/lm_bert_callback.py
@@ -22,25 +22,33 @@
 
 
 def eval_iter_callback(tensors, global_vars):
-    if "dev_mlm_loss" not in global_vars.keys():
-        global_vars["dev_mlm_loss"] = []
-    if "dev_nsp_loss" not in global_vars.keys():
-        global_vars["dev_nsp_loss"] = []
-    keys = list(tensors.keys())
-    # TODO: referring to these by name here is error-prone
-    for dev_mlm_loss in tensors[keys[1]]:
-        global_vars["dev_mlm_loss"].append(dev_mlm_loss.item())
 
-    if len(keys) > 2:
-        for dev_nsp_loss in tensors[keys[2]]:
-            global_vars["dev_nsp_loss"].append(dev_nsp_loss.item())
+    for kv, v in tensors.items():
+
+        if 'SmoothedCrossEntropyLoss' in kv:
+            if "dev_mlm_loss" not in global_vars.keys():
+                global_vars["dev_mlm_loss"] = []
+            for dev_mlm_loss in v:
+                global_vars["dev_mlm_loss"].append(dev_mlm_loss.item())
+        if 'CrossEntropyLossNM' in kv:
+            if "dev_nsp_loss" not in global_vars.keys():
+                global_vars["dev_nsp_loss"] = []
+            for dev_nsp_loss in v:
+                global_vars["dev_nsp_loss"].append(dev_nsp_loss.item())
+        if 'LossAggregatorNM' in kv:
+            if "dev_loss" not in global_vars.keys():
+                global_vars["dev_loss"] = []
+            for dev_loss in v:
+                global_vars["dev_loss"].append(dev_loss.item())
 
 
 def eval_epochs_done_callback(global_vars):
+    res = {}
     if 'dev_mlm_loss' in global_vars:
         mlm_loss = np.mean(global_vars["dev_mlm_loss"])
         logging.info("Dev MLM perplexity: {0}".format(np.round(np.exp(mlm_loss), 3)))
         global_vars["dev_mlm_loss"] = []
+        res["Dev MLM loss"] = mlm_loss
     else:
         mlm_loss = -123.0
 
@@ -48,7 +56,16 @@ def eval_epochs_done_callback(global_vars):
         nsp_loss = np.mean(global_vars["dev_nsp_loss"])
         logging.info("Dev NSP perplexity: {0}".format(np.round(np.exp(nsp_loss), 3)))
         global_vars["dev_nsp_loss"] = []
+        res["Dev NSP loss"] = nsp_loss
+    else:
+        nsp_loss = -123.0
+
+    if 'dev_loss' in global_vars:
+        total_loss = np.mean(global_vars["dev_loss"])
+        logging.info("Dev perplexity: {0}".format(np.round(np.exp(total_loss), 3)))
+        global_vars["dev_loss"] = []
+        res["Dev loss"] = total_loss
     else:
         nsp_loss = -123.0
 
-    return dict({"Dev MLM loss": mlm_loss, "Dev NSP loss": nsp_loss})
+    return res
diff --git a/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py b/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py
index dd8a1a5cd5c4..b11ef5ddb6f5 100644
--- a/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py
+++ b/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py
@@ -17,10 +17,14 @@
 import random
 
 import numpy as np
-from sklearn.metrics import classification_report
 
 from nemo import logging
-from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list
+from nemo.collections.nlp.utils.callback_utils import (
+    get_classification_report,
+    list2str,
+    plot_confusion_matrix,
+    tensor2list,
+)
 
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
@@ -112,11 +116,8 @@ def _eval_epochs_done_callback(task_name, global_vars, label_ids, graph_fold=Non
     logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
     logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))
 
-    # remove labels from label_ids that don't appear in the dev set
-    used_labels = set(labels) | set(preds)
-    label_ids = {k: label_ids[k] for k, v in label_ids.items() if v in used_labels}
-
-    logging.info(classification_report(labels, preds, target_names=label_ids))
+    classification_report = get_classification_report(labels, preds, label_ids)
+    logging.info(classification_report)
 
     # calculate and plot confusion_matrix
     if graph_fold:
diff --git a/nemo/collections/nlp/callbacks/qa_squad_callback.py b/nemo/collections/nlp/callbacks/qa_squad_callback.py
index 3e6f7ae10a3c..5d2f07516823 100644
--- a/nemo/collections/nlp/callbacks/qa_squad_callback.py
+++ b/nemo/collections/nlp/callbacks/qa_squad_callback.py
@@ -56,7 +56,7 @@ def eval_epochs_done_callback(
     version_2_with_negative,
     null_score_diff_threshold,
 ):
-    exact_match, f1, _ = eval_data_layer.dataset.evaluate(
+    exact_match, f1, _, _ = eval_data_layer.dataset.evaluate(
         unique_ids=global_vars["eval_unique_ids"],
         start_logits=global_vars["eval_start_logits"],
         end_logits=global_vars["eval_end_logits"],
diff --git a/nemo/collections/nlp/callbacks/sgd_callback.py b/nemo/collections/nlp/callbacks/sgd_callback.py
new file mode 100644
index 000000000000..607b84ca3043
--- /dev/null
+++ b/nemo/collections/nlp/callbacks/sgd_callback.py
@@ -0,0 +1,216 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst
+"""
+
+import json
+import os
+
+import torch
+
+import nemo.collections.nlp.data.datasets.sgd_dataset.prediction_utils as pred_utils
+from nemo import logging
+from nemo.collections.nlp.data.datasets.sgd_dataset.data_processor import SGDDataProcessor
+from nemo.collections.nlp.data.datasets.sgd_dataset.evaluate import (
+    ALL_SERVICES,
+    PER_FRAME_OUTPUT_FILENAME,
+    SEEN_SERVICES,
+    UNSEEN_SERVICES,
+    get_dataset_as_dict,
+    get_in_domain_services,
+    get_metrics,
+)
+
+__all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
+
+
+def tensor2list(tensor):
+    return tensor.detach().cpu().tolist()
+
+
+def get_str_example_id(eval_dataset, ids_to_service_names_dict, example_id_num):
+    def format_turn_id(ex_id_num):
+        dialog_id_1, dialog_id_2, turn_id, service_id = ex_id_num
+        return "{}-{}_{:05d}-{:02d}-{}".format(
+            eval_dataset, dialog_id_1, dialog_id_2, turn_id, ids_to_service_names_dict[service_id]
+        )
+
+    return list(map(format_turn_id, tensor2list(example_id_num)))
+
+
+def eval_iter_callback(tensors, global_vars, schema_processor, eval_dataset):
+    if 'predictions' not in global_vars:
+        global_vars['predictions'] = []
+
+    output = {}
+    for k, v in tensors.items():
+        ind = k.find('~~~')
+        if ind != -1:
+            output[k[:ind]] = torch.cat(v)
+
+    predictions = {}
+    ids_to_service_names_dict = schema_processor.get_ids_to_service_names_dict()
+    predictions['example_id'] = get_str_example_id(eval_dataset, ids_to_service_names_dict, output['example_id_num'])
+
+    predictions['service_id'] = output['service_id']
+    predictions['is_real_example'] = output['is_real_example']
+
+    # Scores are output for each intent.
+    # Note that the intent indices are shifted by 1 to account for NONE intent.
+    predictions['intent_status'] = torch.argmax(output['logit_intent_status'], -1)
+
+    # Scores are output for each requested slot.
+    predictions['req_slot_status'] = torch.nn.Sigmoid()(output['logit_req_slot_status'])
+
+    # For categorical slots, the status of each slot and the predicted value are output.
+    cat_slot_status_dist = torch.nn.Softmax(dim=-1)(output['logit_cat_slot_status'])
+    cat_slot_value_dist = torch.nn.Softmax(dim=-1)(output['logit_cat_slot_value'])
+
+    predictions['cat_slot_status'] = torch.argmax(output['logit_cat_slot_status'], axis=-1)
+    predictions['cat_slot_status_p'] = torch.max(cat_slot_status_dist, axis=-1)[0]
+    predictions['cat_slot_value'] = torch.argmax(output['logit_cat_slot_value'], axis=-1)
+    predictions['cat_slot_value_p'] = torch.max(cat_slot_value_dist, axis=-1)[0]
+
+    # For non-categorical slots, the status of each slot and the indices for spans are output.
+    noncat_slot_status_dist = torch.nn.Softmax(dim=-1)(output['logit_noncat_slot_status'])
+
+    predictions['noncat_slot_status'] = torch.argmax(output['logit_noncat_slot_status'], axis=-1)
+    predictions['noncat_slot_status_p'] = torch.max(noncat_slot_status_dist, axis=-1)[0]
+
+    softmax = torch.nn.Softmax(dim=-1)
+    start_scores = softmax(output['logit_noncat_slot_start'])
+    end_scores = softmax(output['logit_noncat_slot_end'])
+
+    batch_size, max_num_noncat_slots, max_num_tokens = end_scores.size()
+    # Find the span with the maximum sum of scores for start and end indices.
+    total_scores = torch.unsqueeze(start_scores, axis=3) + torch.unsqueeze(end_scores, axis=2)
+    # Mask out scores where start_index > end_index.
+    # device = total_scores.device
+    start_idx = torch.arange(max_num_tokens, device=total_scores.device).view(1, 1, -1, 1)
+    end_idx = torch.arange(max_num_tokens, device=total_scores.device).view(1, 1, 1, -1)
+    invalid_index_mask = (start_idx > end_idx).repeat(batch_size, max_num_noncat_slots, 1, 1)
+    total_scores = torch.where(
+        invalid_index_mask,
+        torch.zeros(total_scores.size(), device=total_scores.device, dtype=total_scores.dtype),
+        total_scores,
+    )
+    max_span_index = torch.argmax(total_scores.view(-1, max_num_noncat_slots, max_num_tokens ** 2), axis=-1)
+    max_span_p = torch.max(total_scores.view(-1, max_num_noncat_slots, max_num_tokens ** 2), axis=-1)[0]
+    predictions['noncat_slot_p'] = max_span_p
+
+    span_start_index = torch.div(max_span_index, max_num_tokens)
+    span_end_index = torch.fmod(max_span_index, max_num_tokens)
+
+    predictions['noncat_slot_start'] = span_start_index
+    predictions['noncat_slot_end'] = span_end_index
+
+    # Add inverse alignments.
+    predictions['noncat_alignment_start'] = output['start_char_idx']
+    predictions['noncat_alignment_end'] = output['end_char_idx']
+
+    # added for debugging
+    predictions['cat_slot_status_GT'] = output['categorical_slot_status']
+    predictions['noncat_slot_status_GT'] = output['noncategorical_slot_status']
+
+    global_vars['predictions'].extend(combine_predictions_in_example(predictions, batch_size))
+
+
+def combine_predictions_in_example(predictions, batch_size):
+    '''
+    Combines predicted values to a single example.
+    '''
+    examples_preds = [{} for _ in range(batch_size)]
+    for k, v in predictions.items():
+        if k != 'example_id':
+            v = torch.chunk(v, batch_size)
+
+        for i in range(batch_size):
+            if k == 'example_id':
+                examples_preds[i][k] = v[i]
+            else:
+                examples_preds[i][k] = v[i].view(-1)
+    return examples_preds
+
+
+def eval_epochs_done_callback(
+    global_vars,
+    task_name,
+    eval_dataset,
+    data_dir,
+    prediction_dir,
+    state_tracker,
+    eval_debug,
+    dialogues_processor,
+    schema_emb_preprocessor,
+    joint_acc_across_turn,
+    no_fuzzy_match,
+):
+    # added for debugging
+    in_domain_services = get_in_domain_services(
+        os.path.join(data_dir, eval_dataset, "schema.json"), dialogues_processor.get_seen_services("train")
+    )
+    ##############
+    # we'll write predictions to file in Dstc8/SGD format during evaluation callback
+    prediction_dir = os.path.join(prediction_dir, 'predictions', 'pred_res_{}_{}'.format(eval_dataset, task_name))
+    os.makedirs(prediction_dir, exist_ok=True)
+
+    input_json_files = SGDDataProcessor.get_dialogue_files(data_dir, eval_dataset, task_name)
+    pred_utils.write_predictions_to_file(
+        global_vars['predictions'],
+        input_json_files,
+        prediction_dir,
+        schemas=schema_emb_preprocessor.schemas,
+        state_tracker=state_tracker,
+        eval_debug=eval_debug,
+        in_domain_services=in_domain_services,
+    )
+    metrics = evaluate(
+        prediction_dir, data_dir, eval_dataset, in_domain_services, joint_acc_across_turn, no_fuzzy_match,
+    )
+    return metrics
+
+
+def evaluate(prediction_dir, data_dir, eval_dataset, in_domain_services, joint_acc_across_turn, no_fuzzy_match):
+
+    with open(os.path.join(data_dir, eval_dataset, "schema.json")) as f:
+        eval_services = {}
+        list_services = json.load(f)
+        for service in list_services:
+            eval_services[service["service_name"]] = service
+        f.close()
+
+    dataset_ref = get_dataset_as_dict(os.path.join(data_dir, eval_dataset, "dialogues_*.json"))
+    dataset_hyp = get_dataset_as_dict(os.path.join(prediction_dir, "*.json"))
+
+    all_metric_aggregate, _ = get_metrics(
+        dataset_ref, dataset_hyp, eval_services, in_domain_services, joint_acc_across_turn, no_fuzzy_match
+    )
+    if SEEN_SERVICES in all_metric_aggregate:
+        logging.info(f'Dialog metrics for {SEEN_SERVICES}  : {sorted(all_metric_aggregate[SEEN_SERVICES].items())}')
+    if UNSEEN_SERVICES in all_metric_aggregate:
+        logging.info(f'Dialog metrics for {UNSEEN_SERVICES}: {sorted(all_metric_aggregate[UNSEEN_SERVICES].items())}')
+    if ALL_SERVICES in all_metric_aggregate:
+        logging.info(f'Dialog metrics for {ALL_SERVICES}   : {sorted(all_metric_aggregate[ALL_SERVICES].items())}')
+
+    # Write the per-frame metrics values with the corrresponding dialogue frames.
+    with open(os.path.join(prediction_dir, PER_FRAME_OUTPUT_FILENAME), "w") as f:
+        json.dump(dataset_hyp, f, indent=2, separators=(",", ": "))
+        f.close()
+    return all_metric_aggregate[ALL_SERVICES]
diff --git a/nemo/collections/nlp/callbacks/state_tracking_trade_callback.py b/nemo/collections/nlp/callbacks/state_tracking_trade_callback.py
index f0347ad18c19..8cedb8a1fa8d 100644
--- a/nemo/collections/nlp/callbacks/state_tracking_trade_callback.py
+++ b/nemo/collections/nlp/callbacks/state_tracking_trade_callback.py
@@ -18,6 +18,7 @@
 import torch
 
 from nemo import logging
+from nemo.collections.nlp.utils.callback_utils import tensor2list, tensor2numpy
 
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
@@ -33,31 +34,40 @@ def eval_iter_callback(tensors, global_vars, data_desc):
     if 'gating_preds' not in global_vars:
         global_vars['gating_preds'] = []
 
-    for kv, v in tensors.items():
-        if kv.startswith('loss'):
-            loss_numpy = v[0].cpu().numpy()
-            global_vars['loss'].append(loss_numpy)
-        if kv.startswith('point_outputs'):
-            point_outputs = v[0]
-        if kv.startswith('gate_outputs'):
-            gate_outputs = v[0]
-        if kv.startswith('gating_labels'):
-            gating_labels = v[0].cpu().numpy()
-            global_vars['gating_labels'].extend(gating_labels)
-        if kv.startswith('tgt_ids'):
-            tgt_ids = v[0]
-
-    point_outputs_max = torch.argmax(point_outputs, dim=-1)
-    mask_paddings = tgt_ids == data_desc.vocab.pad_id
-    comp_res = (point_outputs_max == tgt_ids) | mask_paddings
-    comp_res = torch.all(comp_res, axis=-1, keepdims=False)
-
-    global_vars['comp_res'].extend(comp_res.cpu().numpy())
-    global_vars['gating_preds'].extend(torch.argmax(gate_outputs, axis=-1).cpu().numpy())
+    point_outputs_max_list = []
+    tgt_ids_list = []
+    gate_outputs_max_list = []
+    for tensor_name, values_list in tensors.items():
+        if tensor_name.startswith('gating_labels'):
+            for values in values_list:
+                global_vars['gating_labels'].extend(tensor2list(values))
+        elif tensor_name.startswith('point_outputs'):
+            for values in values_list:
+                p_max = torch.argmax(values, dim=-1)
+                point_outputs_max_list.append(tensor2numpy(p_max))
+        elif tensor_name.startswith('gate_outputs'):
+            for values in values_list:
+                g_max = torch.argmax(values, axis=-1)
+                gate_outputs_max_list.append(tensor2numpy(g_max))
+        elif tensor_name.startswith('tgt_ids'):
+            for values in values_list:
+                tgt_ids_list.append(tensor2numpy(values))
+
+    comp_res_list = []
+    for i in range(len(point_outputs_max_list)):
+        mask_paddings = tgt_ids_list[i] == data_desc.vocab.pad_id
+        comp_res = (point_outputs_max_list[i] == tgt_ids_list[i]) | mask_paddings
+        comp_res = np.all(comp_res, axis=-1, keepdims=False)
+        comp_res_list.extend(comp_res.tolist())
+
+    gate_outputs_max = np.concatenate(gate_outputs_max_list, axis=0).tolist()
+
+    global_vars['comp_res'].extend(comp_res_list)
+    global_vars['gating_preds'].extend(gate_outputs_max)
 
 
 def eval_epochs_done_callback(global_vars, data_desc):
-    joint_acc, turn_acc = evaluate_metrics(
+    joint_acc, slot_acc = evaluate_metrics(
         global_vars['comp_res'],
         global_vars['gating_labels'],
         global_vars['gating_preds'],
@@ -67,7 +77,7 @@ def eval_epochs_done_callback(global_vars, data_desc):
     gating_comp_flatten = (np.asarray(global_vars['gating_labels']) == np.asarray(global_vars['gating_preds'])).ravel()
     gating_acc = np.sum(gating_comp_flatten) / len(gating_comp_flatten)
 
-    evaluation_metrics = {"Joint_Goal_Acc": joint_acc, "Turn_Acc": turn_acc, "Gate_Acc": gating_acc}
+    evaluation_metrics = {"Joint_Goal_Acc": joint_acc, "Slot_Acc": slot_acc, "Gate_Acc": gating_acc}
     logging.info(evaluation_metrics)
 
     return evaluation_metrics
@@ -98,6 +108,6 @@ def evaluate_metrics(comp_res, gating_labels, gating_preds, ptr_code):
         if not turn_wrong:
             correct_turns += 1
 
-    turn_acc = correct_slots / float(total_slots) if total_slots != 0 else 0
+    slot_acc = correct_slots / float(total_slots) if total_slots != 0 else 0
     joint_acc = correct_turns / float(total_turns) if total_turns != 0 else 0
-    return joint_acc, turn_acc
+    return joint_acc, slot_acc
diff --git a/nemo/collections/nlp/callbacks/text_classification_callback.py b/nemo/collections/nlp/callbacks/text_classification_callback.py
index 5a1e313fdb76..bd62e6d89237 100644
--- a/nemo/collections/nlp/callbacks/text_classification_callback.py
+++ b/nemo/collections/nlp/callbacks/text_classification_callback.py
@@ -64,5 +64,5 @@ def eval_epochs_done_callback(global_vars, graph_fold):
     logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
     logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))
     plot_confusion_matrix(labels, preds, graph_fold)
-    logging.info(classification_report(labels, preds))
+    logging.info(classification_report(labels, preds, digits=4))
     return dict({"accuracy": accuracy})
diff --git a/nemo/collections/nlp/callbacks/token_classification_callback.py b/nemo/collections/nlp/callbacks/token_classification_callback.py
index 5749b59b8475..9d3ec68cca9b 100644
--- a/nemo/collections/nlp/callbacks/token_classification_callback.py
+++ b/nemo/collections/nlp/callbacks/token_classification_callback.py
@@ -17,10 +17,15 @@
 import random
 
 import numpy as np
-from sklearn.metrics import classification_report
 
 from nemo import logging
-from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list
+from nemo.collections.nlp.utils.callback_utils import (
+    get_classification_report,
+    get_f1_scores,
+    list2str,
+    plot_confusion_matrix,
+    tensor2list,
+)
 
 __all__ = ['eval_iter_callback', 'eval_epochs_done_callback']
 
@@ -57,7 +62,7 @@ def eval_iter_callback(tensors, global_vars):
     global_vars["all_subtokens_mask"].extend(all_subtokens_mask)
 
 
-def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_label_id=0, normalize_cm=True):
+def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, normalize_cm=True):
     labels = np.asarray(global_vars['all_labels'])
     preds = np.asarray(global_vars['all_preds'])
     subtokens_mask = np.asarray(global_vars['all_subtokens_mask']) > 0.5
@@ -65,9 +70,6 @@ def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_labe
     labels = labels[subtokens_mask]
     preds = preds[subtokens_mask]
 
-    accuracy = sum(labels == preds) / labels.shape[0]
-    logging.info(f'Accuracy: {accuracy}')
-
     # print predictions and labels for a small random subset of data
     sample_size = 20
     i = 0
@@ -76,11 +78,15 @@ def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_labe
     logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size]))
     logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size]))
 
-    # remove labels from label_ids that don't appear in the dev set
-    used_labels = set(labels) | set(preds)
-    label_ids = {k: label_ids[k] for k, v in label_ids.items() if v in used_labels}
+    accuracy = sum(labels == preds) / labels.shape[0]
+    logging.info(f'Accuracy: {accuracy}')
+
+    f1_scores = get_f1_scores(labels, preds, average_modes=['weighted', 'macro', 'micro'])
+    for k, v in f1_scores.items():
+        logging.info(f'{k}: {v}')
 
-    logging.info(classification_report(labels, preds, target_names=label_ids))
+    classification_report = get_classification_report(labels, preds, label_ids)
+    logging.info(classification_report)
 
     # calculate and plot confusion_matrix
     if graph_fold:
diff --git a/nemo/collections/nlp/data/datasets/__init__.py b/nemo/collections/nlp/data/datasets/__init__.py
index 8e598e5655d3..1e31f3e115f4 100644
--- a/nemo/collections/nlp/data/datasets/__init__.py
+++ b/nemo/collections/nlp/data/datasets/__init__.py
@@ -14,9 +14,8 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo.collections.nlp.data.datasets import datasets_utils
-from nemo.collections.nlp.data.datasets.glue_benchmark_dataset import GLUEDataset
-from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset import (
+from nemo.collections.nlp.data.datasets.glue_benchmark_dataset.glue_benchmark_dataset import GLUEDataset
+from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset.joint_intent_slot_dataset import (
     BertJointIntentSlotDataset,
     BertJointIntentSlotInferDataset,
 )
@@ -26,13 +25,18 @@
 )
 from nemo.collections.nlp.data.datasets.lm_transformer_dataset import LanguageModelingDataset
 from nemo.collections.nlp.data.datasets.machine_translation_dataset import TranslationDataset
+from nemo.collections.nlp.data.datasets.multiwoz_dataset import *
 from nemo.collections.nlp.data.datasets.punctuation_capitalization_dataset import (
     BertPunctuationCapitalizationDataset,
     BertPunctuationCapitalizationInferDataset,
 )
-from nemo.collections.nlp.data.datasets.qa_squad_dataset import SquadDataset
-from nemo.collections.nlp.data.datasets.state_tracking_trade_dataset import *
-from nemo.collections.nlp.data.datasets.text_classification_dataset import BertTextClassificationDataset
+from nemo.collections.nlp.data.datasets.qa_squad_dataset.qa_squad_dataset import SquadDataset
+from nemo.collections.nlp.data.datasets.sgd_dataset.schema_embedding_dataset import SchemaEmbeddingDataset
+from nemo.collections.nlp.data.datasets.sgd_dataset.sgd_dataset import SGDDataset
+from nemo.collections.nlp.data.datasets.text_classification import (
+    BertTextClassificationDataset,
+    TextClassificationDataDesc,
+)
 from nemo.collections.nlp.data.datasets.token_classification_dataset import (
     BertTokenClassificationDataset,
     BertTokenClassificationInferDataset,
diff --git a/nemo/collections/nlp/data/datasets/datasets_utils.py b/nemo/collections/nlp/data/datasets/datasets_utils.py
deleted file mode 100644
index 8f4e0640245d..000000000000
--- a/nemo/collections/nlp/data/datasets/datasets_utils.py
+++ /dev/null
@@ -1,990 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import csv
-import glob
-import json
-import os
-import random
-import re
-import shutil
-import string
-import subprocess
-from collections import Counter
-
-import numpy as np
-from tqdm import tqdm
-
-from nemo import logging
-from nemo.collections.nlp.utils.callback_utils import list2str
-from nemo.collections.nlp.utils.common_nlp_utils import (
-    get_vocab,
-    ids2text,
-    if_exist,
-    write_vocab,
-    write_vocab_in_order,
-)
-
-__all__ = [
-    'get_label_stats',
-    'process_sst_2',
-    'process_imdb',
-    'process_thucnews',
-    'process_nlu',
-    'process_twitter_airline',
-    'process_atis',
-    'process_jarvis_datasets',
-    'process_mturk',
-    'process_intent_slot_mturk',
-    'get_intents_mturk',
-    'get_slot_labels',
-    'merge',
-    'get_intent_query_files_dialogflow',
-    'get_intents_slots_dialogflow',
-    'get_slots_dialogflow',
-    'partition_data',
-    'write_files',
-    'process_dialogflow',
-    'write_data',
-    'create_dataset',
-    'read_csv',
-    'process_snips',
-    'get_dataset',
-    'partition',
-    'map_entities',
-    'get_entities',
-    'get_data',
-    'reverse_dict',
-    'get_intent_labels',
-    'download_wkt2',
-    'normalize_answer',
-    'get_tokens',
-]
-
-DATABASE_EXISTS_TMP = '{} dataset has already been processed and stored at {}'
-MODE_EXISTS_TMP = '{} mode of {} dataset has already been processed and stored at {}'
-
-
-def get_label_stats(labels, outfile='stats.tsv'):
-    labels = Counter(labels)
-    total = sum(labels.values())
-    out = open(outfile, 'w')
-    i = 0
-    label_frequencies = labels.most_common()
-    for k, v in label_frequencies:
-        out.write(f'{k}\t{v / total}\n')
-        if i < 3:
-            logging.info(f'{i} item: {k}, {v} out of {total}, {v / total}.')
-        i += 1
-    return total, label_frequencies
-
-
-def process_sst_2(data_dir):
-    if not os.path.exists(data_dir):
-        link = 'https://gluebenchmark.com/tasks'
-        raise ValueError(f'Data not found at {data_dir}. ' f'Please download SST-2 from {link}.')
-    logging.info('Keep in mind that SST-2 is only available in lower case.')
-    return data_dir
-
-
-def process_imdb(data_dir, uncased, modes=['train', 'test']):
-    if not os.path.exists(data_dir):
-        link = 'www.kaggle.com/iarunava/imdb-movie-reviews-dataset'
-        raise ValueError(f'Data not found at {data_dir}. ' f'Please download IMDB from {link}.')
-
-    outfold = f'{data_dir}/nemo-processed'
-
-    if uncased:
-        outfold = f'{outfold}_uncased'
-
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format('IMDB', outfold))
-        return outfold
-    logging.info(f'Processing IMDB dataset and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    outfiles = {}
-
-    for mode in modes:
-        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
-        outfiles[mode].write('sentence\tlabel\n')
-        for sent in ['neg', 'pos']:
-            if sent == 'neg':
-                label = 0
-            else:
-                label = 1
-            files = glob.glob(f'{data_dir}/{mode}/{sent}/*.txt')
-            for file in files:
-                with open(file, 'r') as f:
-                    review = f.read().strip()
-                if uncased:
-                    review = review.lower()
-                review = review.replace("<br />", "")
-                outfiles[mode].write(f'{review}\t{label}\n')
-    for mode in modes:
-        outfiles[mode].close()
-
-    return outfold
-
-
-def process_thucnews(data_dir):
-    modes = ['train', 'test']
-    train_size = 0.8
-    if not os.path.exists(data_dir):
-        link = 'thuctc.thunlp.org/'
-        raise ValueError(f'Data not found at {data_dir}. ' f'Please download THUCNews from {link}.')
-
-    outfold = f'{data_dir}/nemo-processed-thucnews'
-
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format('THUCNews', outfold))
-        return outfold
-    logging.info(f'Processing THUCNews dataset and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    outfiles = {}
-
-    for mode in modes:
-        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'a+', encoding='utf-8')
-        outfiles[mode].write('sentence\tlabel\n')
-    categories = ['体育', '娱乐', '家居', '彩票', '房产', '教育', '时尚', '时政', '星座', '游戏', '社会', '科技', '股票', '财经']
-    for category in categories:
-        label = categories.index(category)
-        category_files = glob.glob(f'{data_dir}/{category}/*.txt')
-        test_num = int(len(category_files) * (1 - train_size))
-        test_files = category_files[:test_num]
-        train_files = category_files[test_num:]
-        for mode in modes:
-            logging.info(f'Processing {mode} data of the category {category}')
-            if mode == 'test':
-                files = test_files
-            else:
-                files = train_files
-            for file in tqdm(files):
-                with open(file, 'r', encoding='utf-8') as f:
-                    news = f.read().strip().replace('\r', '')
-                    news = news.replace('\n', '').replace('\t', ' ')
-                    outfiles[mode].write(f'{news}\t{label}\n')
-    for mode in modes:
-        outfiles[mode].close()
-
-    return outfold
-
-
-def process_nlu(filename, uncased, modes=['train', 'test'], dataset_name='nlu-ubuntu'):
-    """ Dataset has to be of:
-    - ubuntu
-    - chat
-    - web
-    """
-
-    if not os.path.exists(filename):
-        link = 'https://github.com/sebischair/NLU-Evaluation-Corpora'
-        raise ValueError(f'Data not found at {filename}. ' f'Please download IMDB from {link}.')
-
-    if dataset_name == 'nlu-ubuntu':
-        INTENT = {'makeupdate': 1, 'setupprinter': 2, 'shutdowncomputer': 3, 'softwarerecommendation': 4, 'none': 0}
-    elif dataset_name == 'nlu-chat':
-        INTENT = {'departuretime': 0, 'findconnection': 1}
-    elif dataset_name == 'nlu-web':
-        INTENT = {
-            'changepassword': 1,
-            'deleteaccount': 2,
-            'downloadvideo': 3,
-            'exportdata': 4,
-            'filterspam': 5,
-            'findalternative': 6,
-            'syncaccounts': 7,
-            'none': 0,
-        }
-    else:
-        raise ValueError(f'{dataset_name}: Invalid dataset name')
-
-    infold = filename[: filename.rfind('/')]
-    outfold = f'{infold}/{dataset_name}-nemo-processed'
-
-    if uncased:
-        outfold = f'{outfold}_uncased'
-
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format(dataset_name.upper(), outfold))
-        return outfold
-    logging.info(f'Processing data and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    outfiles = {}
-
-    for mode in modes:
-        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
-        outfiles[mode].write('sentence\tlabel\n')
-
-    with open(filename, 'r') as f:
-        data = json.load(f)
-
-    for obj in data['sentences']:
-        sentence = obj['text'].strip()
-        if uncased:
-            sentence = sentence.lower()
-        intent = obj['intent'].lower().replace(' ', '')
-        label = INTENT[intent]
-        txt = f'{sentence}\t{label}\n'
-        if obj['training']:
-            outfiles['train'].write(txt)
-        else:
-            outfiles['test'].write(txt)
-    for mode in modes:
-        outfiles[mode].close()
-    return outfold
-
-
-def process_twitter_airline(filename, uncased, modes=['train', 'test']):
-    """ Dataset from Kaggle:
-    https://www.kaggle.com/crowdflower/twitter-airline-sentiment
-    """
-    pass
-
-
-def process_atis(infold, uncased, modes=['train', 'test'], dev_split=0):
-    """ MSFT's dataset, processed by Kaggle
-    https://www.kaggle.com/siddhadev/atis-dataset-from-ms-cntk
-    """
-    outfold = f'{infold}/nemo-processed'
-    vocab = get_vocab(f'{infold}/atis.dict.vocab.csv')
-
-    if uncased:
-        outfold = f'{outfold}-uncased'
-
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format('ATIS', outfold))
-        return outfold
-    logging.info(f'Processing ATIS dataset and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    outfiles = {}
-
-    for mode in modes:
-        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
-        outfiles[mode].write('sentence\tlabel\n')
-        outfiles[mode + '_slots'] = open(f'{outfold}/{mode}_slots.tsv', 'w')
-
-        queries = open(f'{infold}/atis.{mode}.query.csv', 'r').readlines()
-        intents = open(f'{infold}/atis.{mode}.intent.csv', 'r').readlines()
-        slots = open(f'{infold}/atis.{mode}.slots.csv', 'r').readlines()
-
-        for i, query in enumerate(queries):
-            sentence = ids2text(query.strip().split()[1:-1], vocab)
-            outfiles[mode].write(f'{sentence}\t{intents[i].strip()}\n')
-            slot = ' '.join(slots[i].strip().split()[1:-1])
-            outfiles[mode + '_slots'].write(slot + '\n')
-
-    shutil.copyfile(f'{infold}/atis.dict.intent.csv', f'{outfold}/dict.intents.csv')
-    shutil.copyfile(f'{infold}/atis.dict.slots.csv', f'{outfold}/dict.slots.csv')
-    for mode in modes:
-        outfiles[mode].close()
-
-    return outfold
-
-
-def process_jarvis_datasets(infold, uncased, dataset_name, modes=['train', 'test', 'eval'], ignore_prev_intent=False):
-    """ process and convert Jarvis datasets into NeMo's BIO format
-    """
-    outfold = f'{infold}/{dataset_name}-nemo-processed'
-    infold = f'{infold}/'
-
-    if uncased:
-        outfold = f'{outfold}-uncased'
-
-    if if_exist(outfold, ['dict.intents.csv', 'dict.slots.csv']):
-        logging.info(DATABASE_EXISTS_TMP.format(dataset_name, outfold))
-        return outfold
-
-    logging.info(f'Processing {dataset_name} dataset and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    outfiles = {}
-    intents_list = {}
-    slots_list = {}
-    slots_list_all = {}
-
-    outfiles['dict_intents'] = open(f'{outfold}/dict.intents.csv', 'w')
-    outfiles['dict_slots'] = open(f'{outfold}/dict.slots.csv', 'w')
-
-    outfiles['dict_slots'].write('O\n')
-    slots_list["O"] = 0
-    slots_list_all["O"] = 0
-
-    for mode in modes:
-        if if_exist(outfold, [f'{mode}.tsv']):
-            logging.info(MODE_EXISTS_TMP.format(mode, dataset_name, outfold, mode))
-            continue
-
-        if not if_exist(infold, [f'{mode}.tsv']):
-            logging.info(f'{mode} mode of {dataset_name}' f' is skipped as it was not found.')
-            continue
-
-        outfiles[mode] = open(os.path.join(outfold, mode + '.tsv'), 'w')
-        outfiles[mode].write('sentence\tlabel\n')
-        outfiles[mode + '_slots'] = open(f'{outfold}/{mode}_slots.tsv', 'w')
-
-        queries = open(f'{infold}/{mode}.tsv', 'r').readlines()
-
-        for i, query in enumerate(queries):
-            line_splits = query.strip().split("\t")
-            if len(line_splits) == 3:
-                intent_str, slot_tags_str, sentence = line_splits
-            else:
-                intent_str, sentence = line_splits
-                slot_tags_str = ""
-
-            if intent_str not in intents_list:
-                intents_list[intent_str] = len(intents_list)
-                outfiles['dict_intents'].write(f'{intent_str}\n')
-
-            if ignore_prev_intent:
-                start_token = 2
-            else:
-                start_token = 1
-            sentence_cld = " ".join(sentence.strip().split()[start_token:-1])
-            outfiles[mode].write(f'{sentence_cld}\t' f'{str(intents_list[intent_str])}\n')
-
-            slot_tags_list = []
-            if slot_tags_str.strip():
-                slot_tags = slot_tags_str.strip().split(",")
-                for st in slot_tags:
-                    if not st.strip():
-                        continue
-                    [start_i, end_i, slot_name] = st.strip().split(":")
-                    slot_tags_list.append([int(start_i), int(end_i), slot_name])
-                    if slot_name not in slots_list:
-                        slots_list[slot_name] = len(slots_list)
-                        slots_list_all[f'B-{slot_name}'] = len(slots_list_all)
-                        slots_list_all[f'I-{slot_name}'] = len(slots_list_all)
-                        outfiles['dict_slots'].write(f'B-{slot_name}\n')
-                        outfiles['dict_slots'].write(f'I-{slot_name}\n')
-
-            slot_tags_list.sort(key=lambda x: x[0])
-            slots = []
-            processed_index = 0
-            for tag_start, tag_end, tag_str in slot_tags_list:
-                if tag_start > processed_index:
-                    words_list = sentence[processed_index:tag_start].strip().split()
-                    slots.extend([str(slots_list_all['O'])] * len(words_list))
-                words_list = sentence[tag_start:tag_end].strip().split()
-                slots.append(str(slots_list_all[f'B-{tag_str}']))
-                slots.extend([str(slots_list_all[f'I-{tag_str}'])] * (len(words_list) - 1))
-                processed_index = tag_end
-
-            if processed_index < len(sentence):
-                words_list = sentence[processed_index:].strip().split()
-                slots.extend([str(slots_list_all['O'])] * len(words_list))
-
-            slots = slots[1:-1]
-            slot = ' '.join(slots)
-            outfiles[mode + '_slots'].write(slot + '\n')
-
-        outfiles[mode + '_slots'].close()
-        outfiles[mode].close()
-
-    outfiles['dict_slots'].close()
-    outfiles['dict_intents'].close()
-
-    return outfold
-
-
-def process_mturk(data_dir, uncased, modes=['train', 'test'], dev_split=0.1):
-    if not os.path.exists(data_dir):
-        link = 'www.mturk.com'
-        raise ValueError(
-            f'Data not found at {data_dir}. ' f'Export your mturk data from' f'{link} and unzip at {data_dir}.'
-        )
-
-    outfold = f'{data_dir}/nemo-processed'
-
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format('mturk', outfold))
-        return outfold
-
-    logging.info(f'Processing dataset from mturk and storing at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    classification_data_file = f'{data_dir}/classification.csv'
-    annotation_data_file = f'{data_dir}/annotation.manifest'
-
-    if not os.path.exists(classification_data_file):
-        raise FileNotFoundError(f'File not found ' f'at {classification_data_file}')
-
-    if not os.path.exists(annotation_data_file):
-        raise FileNotFoundError(f'File not found at {annotation_data_file}')
-
-    utterances = []
-    utterances = read_csv(classification_data_file)
-
-    # This function assumes that the intent classification data has been
-    # reviewed and cleaned and only one label per utterance is present.
-    agreed_all, intent_names = get_intents_mturk(utterances, outfold)
-
-    with open(annotation_data_file, 'r') as f:
-        slot_annotations = f.readlines()
-
-    # This function assumes that the preprocess step would have made
-    # the task_name of all the annotations generic
-    task_name = 'retail-combined'
-
-    # It is assumed that every utterances will have corresponding
-    # slot annotation information
-    if len(slot_annotations) < len(agreed_all):
-        raise ValueError(f'Every utterance must have corresponding' f'slot annotation information')
-
-    slot_labels, intent_queries, slot_tags = process_intent_slot_mturk(
-        slot_annotations, agreed_all, intent_names, task_name
-    )
-
-    assert len(slot_tags) == len(intent_queries)
-
-    dev_split = 0.1
-
-    train_queries, train_slots, test_queries, test_slots = partition_data(intent_queries, slot_tags, split=dev_split)
-
-    write_files(train_queries, f'{outfold}/train.tsv')
-    write_files(train_slots, f'{outfold}/train_slots.tsv')
-
-    write_files(test_queries, f'{outfold}/test.tsv')
-    write_files(test_slots, f'{outfold}/test_slots.tsv')
-
-    write_files(slot_labels, f'{outfold}/dict.slots.csv')
-    write_files(intent_names, f'{outfold}/dict.intents.csv')
-
-    return outfold
-
-
-def process_intent_slot_mturk(slot_annotations, agreed_all, intent_names, task_name):
-    slot_tags = []
-    inorder_utterances = []
-    all_labels = get_slot_labels(slot_annotations, task_name)
-    logging.info(f'agreed_all - {len(agreed_all)}')
-    logging.info(f'Slot annotations - {len(slot_annotations)}')
-
-    for annotation in slot_annotations[0:]:
-        an = json.loads(annotation)
-        utterance = an['source']
-        if len(utterance) > 2 and utterance.startswith('"') and utterance.endswith('"'):
-            utterance = utterance[1:-1]
-
-        if utterance in agreed_all:
-            entities = {}
-            annotated_entities = an[task_name]['annotations']['entities']
-            for i, each_anno in enumerate(annotated_entities):
-                entities[int(each_anno['startOffset'])] = i
-
-            lastptr = 0
-            slotlist = []
-            # sorting annotations by the start offset
-            for i in sorted(entities.keys()):
-                annotated_entities = an[task_name]['annotations']['entities']
-                tags = annotated_entities[entities.get(i)]
-                untagged_words = utterance[lastptr : tags['startOffset']]
-                for _ in untagged_words.split():
-                    slotlist.append(all_labels.get('O'))
-                anno_words = utterance[tags['startOffset'] : tags['endOffset']]
-                # tagging with the IOB format.
-                for j, _ in enumerate(anno_words.split()):
-                    if j == 0:
-                        b_slot = 'B-' + tags['label']
-                        slotlist.append(all_labels.get(b_slot))
-                    else:
-                        i_slot = 'I-' + tags['label']
-                        slotlist.append(all_labels.get(i_slot))
-                lastptr = tags['endOffset']
-
-            untagged_words = utterance[lastptr : len(utterance)]
-            for _ in untagged_words.split():
-                slotlist.append(all_labels.get('O'))
-
-            slotstr = ' '.join(slotlist)
-            slotstr = f'{slotstr.strip()}\n'
-
-            slot_tags.append(slotstr)
-            intent_num = intent_names.get(agreed_all.get(utterance))
-            query_text = f'{utterance.strip()}\t{intent_num}\n'
-            inorder_utterances.append(query_text)
-        # else:
-        #     logging.warning(utterance)
-
-    logging.info(f'inorder utterances - {len(inorder_utterances)}')
-
-    return all_labels, inorder_utterances, slot_tags
-
-
-def get_intents_mturk(utterances, outfold):
-    intent_names = {}
-    intent_count = 0
-
-    agreed_all = {}
-
-    logging.info('Printing all intent_labels')
-    intent_dict = f'{outfold}/dict.intents.csv'
-    if os.path.exists(intent_dict):
-        with open(intent_dict, 'r') as f:
-            for intent_name in f.readlines():
-                intent_names[intent_name.strip()] = intent_count
-                intent_count += 1
-    logging.info(intent_names)
-
-    for i, utterance in enumerate(utterances[1:]):
-
-        if utterance[1] not in agreed_all:
-            agreed_all[utterance[0]] = utterance[1]
-
-        if utterance[1] not in intent_names:
-            intent_names[utterance[1]] = intent_count
-            intent_count += 1
-
-    logging.info(f'Total number of utterance samples: {len(agreed_all)}')
-
-    return agreed_all, intent_names
-
-
-def get_slot_labels(slot_annotations, task_name):
-    slot_labels = json.loads(slot_annotations[0])
-
-    all_labels = {}
-    count = 0
-    # Generating labels with the IOB format.
-    for label in slot_labels[task_name]['annotations']['labels']:
-        b_slot = 'B-' + label['label']
-        i_slot = 'I-' + label['label']
-        all_labels[b_slot] = str(count)
-        count += 1
-        all_labels[i_slot] = str(count)
-        count += 1
-    all_labels['O'] = str(count)
-
-    return all_labels
-
-
-def merge(data_dir, subdirs, dataset_name, modes=['train', 'test']):
-    outfold = f'{data_dir}/{dataset_name}'
-    if if_exist(outfold, [f'{mode}.tsv' for mode in modes]):
-        logging.info(DATABASE_EXISTS_TMP.format('SNIPS-ATIS', outfold))
-        slots = get_vocab(f'{outfold}/dict.slots.csv')
-        none_slot = 0
-        for key in slots:
-            if slots[key] == 'O':
-                none_slot = key
-                break
-        return outfold, int(none_slot)
-
-    os.makedirs(outfold, exist_ok=True)
-
-    data_files, slot_files = {}, {}
-    for mode in modes:
-        data_files[mode] = open(f'{outfold}/{mode}.tsv', 'w')
-        data_files[mode].write('sentence\tlabel\n')
-        slot_files[mode] = open(f'{outfold}/{mode}_slots.tsv', 'w')
-
-    intents, slots = {}, {}
-    intent_shift, slot_shift = 0, 0
-    none_intent, none_slot = -1, -1
-
-    for subdir in subdirs:
-        curr_intents = get_vocab(f'{data_dir}/{subdir}/dict.intents.csv')
-        curr_slots = get_vocab(f'{data_dir}/{subdir}/dict.slots.csv')
-
-        for key in curr_intents:
-            if intent_shift > 0 and curr_intents[key] == 'O':
-                continue
-            if curr_intents[key] == 'O' and intent_shift == 0:
-                none_intent = int(key)
-            intents[int(key) + intent_shift] = curr_intents[key]
-
-        for key in curr_slots:
-            if slot_shift > 0 and curr_slots[key] == 'O':
-                continue
-            if slot_shift == 0 and curr_slots[key] == 'O':
-                none_slot = int(key)
-            slots[int(key) + slot_shift] = curr_slots[key]
-
-        for mode in modes:
-            with open(f'{data_dir}/{subdir}/{mode}.tsv', 'r') as f:
-                for line in f.readlines()[1:]:
-                    text, label = line.strip().split('\t')
-                    label = int(label)
-                    if curr_intents[label] == 'O':
-                        label = none_intent
-                    else:
-                        label = label + intent_shift
-                    data_files[mode].write(f'{text}\t{label}\n')
-
-            with open(f'{data_dir}/{subdir}/{mode}_slots.tsv', 'r') as f:
-                for line in f.readlines():
-                    labels = [int(label) for label in line.strip().split()]
-                    shifted_labels = []
-                    for label in labels:
-                        if curr_slots[label] == 'O':
-                            shifted_labels.append(none_slot)
-                        else:
-                            shifted_labels.append(label + slot_shift)
-                    slot_files[mode].write(list2str(shifted_labels) + '\n')
-
-        intent_shift += len(curr_intents)
-        slot_shift += len(curr_slots)
-
-    write_vocab_in_order(intents, f'{outfold}/dict.intents.csv')
-    write_vocab_in_order(slots, f'{outfold}/dict.slots.csv')
-    return outfold, none_slot
-
-
-def get_intent_query_files_dialogflow(path):
-    fileslist = []
-    for root, _, files in os.walk(path):
-        for file in files:
-            if '_usersays_en.json' in file:
-                fileslist.append(os.path.join(root, file))
-    return fileslist
-
-
-def get_intents_slots_dialogflow(files, slot_labels):
-    intent_names = []
-    intent_queries = []
-    slot_tags = []
-
-    for index, file in enumerate(files):
-        intent_names.append(os.path.basename(file).split('_usersays')[0])
-
-        with open(file) as json_file:
-            intent_data = json.load(json_file)
-            for query in intent_data:
-                query_text = ""
-                slots = ""
-                for segment in query['data']:
-                    query_text = ''.join([query_text, segment['text']])
-                    if 'alias' in segment:
-                        for _ in segment['text'].split():
-                            slots = ' '.join([slots, slot_labels.get(segment['alias'])])
-                    else:
-                        for _ in segment['text'].split():
-                            slots = ' '.join([slots, slot_labels.get('O')])
-                query_text = f'{query_text.strip()}\t{index}\n'
-                intent_queries.append(query_text)
-                slots = f'{slots.strip()}\n'
-                slot_tags.append(slots)
-    return intent_queries, intent_names, slot_tags
-
-
-def get_slots_dialogflow(files):
-    slot_labels = {}
-    count = 0
-    for file in files:
-        intent_head_file = ''.join([file.split('_usersays')[0], '.json'])
-        with open(intent_head_file) as json_file:
-            intent_meta_data = json.load(json_file)
-            for params in intent_meta_data['responses'][0]['parameters']:
-                if params['name'] not in slot_labels:
-                    slot_labels[params['name']] = str(count)
-                    count += 1
-    slot_labels['O'] = str(count)
-    return slot_labels
-
-
-def partition_data(intent_queries, slot_tags, split=0.1):
-    n = len(intent_queries)
-    n_dev = int(n * split)
-    dev_idx = set(random.sample(range(n), n_dev))
-    dev_intents, dev_slots, train_intents, train_slots = [], [], [], []
-
-    dev_intents.append('sentence\tlabel\n')
-    train_intents.append('sentence\tlabel\n')
-
-    for i, item in enumerate(intent_queries):
-        if i in dev_idx:
-            dev_intents.append(item)
-            dev_slots.append(slot_tags[i])
-        else:
-            train_intents.append(item)
-            train_slots.append(slot_tags[i])
-    return train_intents, train_slots, dev_intents, dev_slots
-
-
-def write_files(data, outfile):
-    with open(outfile, 'w') as f:
-        for item in data:
-            item = f'{item.strip()}\n'
-            f.write(item)
-
-
-def process_dialogflow(data_dir, uncased, modes=['train', 'test'], dev_split=0.1):
-    if not os.path.exists(data_dir):
-        link = 'www.dialogflow.com'
-        raise ValueError(
-            f'Data not found at {data_dir}. ' f'Export your dialogflow data from' f'{link} and unzip at {data_dir}.'
-        )
-
-    outfold = f'{data_dir}/dialogflow/nemo-processed'
-
-    '''TO DO  - check for nemo-processed directory
-    already exists. If exists, skip the entire creation steps below. '''
-
-    os.makedirs(outfold, exist_ok=True)
-
-    files = get_intent_query_files_dialogflow(data_dir)
-
-    slot_labels = get_slots_dialogflow(files)
-
-    intent_queries, intent_names, slot_tags = get_intents_slots_dialogflow(files, slot_labels)
-
-    train_queries, train_slots, test_queries, test_slots = partition_data(intent_queries, slot_tags, split=dev_split)
-
-    write_files(train_queries, f'{outfold}/train.tsv')
-    write_files(train_slots, f'{outfold}/train_slots.tsv')
-
-    write_files(test_queries, f'{outfold}/test.tsv')
-    write_files(test_slots, f'{outfold}/test_slots.tsv')
-
-    write_files(slot_labels, f'{outfold}/dict.slots.csv')
-    write_files(intent_names, f'{outfold}/dict.intents.csv')
-
-    return outfold
-
-
-def write_data(data, slot_dict, intent_dict, outfold, mode, uncased):
-    intent_file = open(f'{outfold}/{mode}.tsv', 'w')
-    intent_file.write('sentence\tlabel\n')
-    slot_file = open(f'{outfold}/{mode}_slots.tsv', 'w')
-    for tokens, slots, intent in data:
-        text = ' '.join(tokens)
-        if uncased:
-            text = text.lower()
-        intent_file.write(f'{text}\t{intent_dict[intent]}\n')
-        slots = [str(slot_dict[slot]) for slot in slots]
-        slot_file.write(' '.join(slots) + '\n')
-    intent_file.close()
-    slot_file.close()
-
-
-def create_dataset(train, dev, slots, intents, uncased, outfold):
-    os.makedirs(outfold, exist_ok=True)
-    if 'O' in slots:
-        slots.remove('O')
-    slots = sorted(list(slots)) + ['O']
-    intents = sorted(list(intents))
-    slots = write_vocab(slots, f'{outfold}/dict.slots.csv')
-    intents = write_vocab(intents, f'{outfold}/dict.intents.csv')
-    write_data(train, slots, intents, outfold, 'train', uncased)
-    write_data(dev, slots, intents, outfold, 'test', uncased)
-
-
-def read_csv(file_path):
-    rows = []
-    with open(file_path, 'r') as csvfile:
-        read_csv = csv.reader(csvfile, delimiter=',')
-        for row in read_csv:
-            rows.append(row)
-    return rows
-
-
-def process_snips(data_dir, uncased, modes=['train', 'test'], dev_split=0.1):
-    if not os.path.exists(data_dir):
-        link = 'www.github.com/snipsco/spoken-language'
-        '-understanding-research-datasets'
-        raise ValueError(f'Data not found at {data_dir}. ' f'Resquest to download the SNIPS dataset from {link}.')
-
-    outfold = f'{data_dir}/nemo-processed'
-
-    if uncased:
-        outfold = f'{outfold}-uncased'
-
-    exist = True
-    for dataset in ['light', 'speak', 'all']:
-        if if_exist(f'{outfold}/{dataset}', [f'{mode}.tsv' for mode in modes]):
-            logging.info(DATABASE_EXISTS_TMP.format('SNIPS-' + dataset.upper(), outfold))
-        else:
-            exist = False
-    if exist:
-        return outfold
-
-    logging.info(f'Processing SNIPS dataset and store at {outfold}')
-
-    os.makedirs(outfold, exist_ok=True)
-
-    speak_dir = 'smart-speaker-en-close-field'
-    light_dir = 'smart-lights-en-close-field'
-
-    light_files = [f'{data_dir}/{light_dir}/dataset.json']
-    speak_files = [f'{data_dir}/{speak_dir}/training_dataset.json']
-    speak_files.append(f'{data_dir}/{speak_dir}/test_dataset.json')
-
-    light_train, light_dev, light_slots, light_intents = get_dataset(light_files, dev_split)
-    speak_train, speak_dev, speak_slots, speak_intents = get_dataset(speak_files)
-
-    create_dataset(light_train, light_dev, light_slots, light_intents, uncased, f'{outfold}/light')
-    create_dataset(speak_train, speak_dev, speak_slots, speak_intents, uncased, f'{outfold}/speak')
-    create_dataset(
-        light_train + speak_train,
-        light_dev + speak_dev,
-        light_slots | speak_slots,
-        light_intents | speak_intents,
-        uncased,
-        f'{outfold}/all',
-    )
-
-    return outfold
-
-
-def get_dataset(files, dev_split=0.1):
-    entity2value, value2entity = get_entities(files)
-    data, slots, intents = get_data(files, entity2value, value2entity)
-    if len(data) == 1:
-        train, dev = partition(data[0], split=dev_split)
-    else:
-        train, dev = data[0], data[1]
-    return train, dev, slots, intents
-
-
-def partition(data, split=0.1):
-    n = len(data)
-    n_dev = int(n * split)
-    dev_idx = set(random.sample(range(n), n_dev))
-    dev, train = [], []
-
-    for i, item in enumerate(data):
-        if i in dev_idx:
-            dev.append(item)
-        else:
-            train.append(item)
-    return train, dev
-
-
-def map_entities(entity2value, entities):
-    for key in entities:
-        if 'data' in entities[key]:
-            if key not in entity2value:
-                entity2value[key] = set([])
-
-            values = []
-            for value in entities[key]['data']:
-                values.append(value['value'])
-                values.extend(value['synonyms'])
-            entity2value[key] = entity2value[key] | set(values)
-
-    return entity2value
-
-
-def get_entities(files):
-    entity2value = {}
-    for file in files:
-        with open(file, 'r') as json_file:
-            data = json.load(json_file)
-            entity2value = map_entities(entity2value, data['entities'])
-
-    value2entity = reverse_dict(entity2value)
-    return entity2value, value2entity
-
-
-def get_data(files, entity2value, value2entity):
-    all_data, all_slots, all_intents = [], set(['O']), set()
-    for file in files:
-        file_data = []
-        with open(file, 'r') as json_file:
-            data = json.load(json_file)
-            for intent in data['intents']:
-                all_intents.add(intent)
-                utterances = data['intents'][intent]['utterances']
-                for utterance in utterances:
-                    tokens, slots = [], []
-                    for frag in utterance['data']:
-                        frag_tokens = frag['text'].strip().split()
-                        tokens.extend(frag_tokens)
-                        if 'slot_name' not in frag:
-                            slot = 'O'
-                        else:
-                            slot = frag['slot_name']
-                            all_slots.add(slot)
-                        slots.extend([slot] * len(frag_tokens))
-                    file_data.append((tokens, slots, intent))
-        all_data.append(file_data)
-    return all_data, all_slots, all_intents
-
-
-def reverse_dict(entity2value):
-    value2entity = {}
-    for entity in entity2value:
-        for value in entity2value[entity]:
-            value2entity[value] = entity
-    return value2entity
-
-
-def get_intent_labels(intent_file):
-    labels = {}
-    label = 0
-    with open(intent_file, 'r') as f:
-        for line in f:
-            intent = line.strip()
-            labels[intent] = label
-            label += 1
-    return labels
-
-
-def download_wkt2(data_dir):
-    if os.path.exists(data_dir):
-        return
-    os.makedirs('data/lm', exist_ok=True)
-    logging.warning(f'Data not found at {data_dir}. ' f'Downloading wikitext-2 to data/lm')
-    data_dir = 'data/lm/wikitext-2'
-    subprocess.call('../scripts/get_wkt2.sh')
-    return data_dir
-
-
-def normalize_answer(s):
-    """Lower text and remove punctuation, articles and extra whitespace."""
-
-    def remove_articles(text):
-        return re.sub(r'\b(a|an|the)\b', ' ', text)
-
-    def white_space_fix(text):
-        return ' '.join(text.split())
-
-    def remove_punc(text):
-        exclude = set(string.punctuation)
-        return ''.join(ch for ch in text if ch not in exclude)
-
-    def lower(text):
-        return text.lower()
-
-    return white_space_fix(remove_articles(remove_punc(lower(s))))
-
-
-def get_tokens(s):
-    if not s:
-        return []
-    return normalize_answer(s).split()
-
-
-def get_stats(lengths):
-    lengths = np.asarray(lengths)
-    logging.info(
-        f'Min: {np.min(lengths)} | \
-                 Max: {np.max(lengths)} | \
-                 Mean: {np.mean(lengths)} | \
-                 Median: {np.median(lengths)}'
-    )
-    logging.info(f'75 percentile: {np.percentile(lengths, 75)}')
-    logging.info(f'99 percentile: {np.percentile(lengths, 99)}')
diff --git a/nemo/collections/nlp/data/datasets/datasets_utils/__init__.py b/nemo/collections/nlp/data/datasets/datasets_utils/__init__.py
new file mode 100644
index 000000000000..368a6f4ddb73
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/datasets_utils/__init__.py
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import *
+from nemo.collections.nlp.data.datasets.datasets_utils.datasets_processing import *
diff --git a/nemo/collections/nlp/data/datasets/datasets_utils/data_preprocessing.py b/nemo/collections/nlp/data/datasets/datasets_utils/data_preprocessing.py
new file mode 100644
index 000000000000..9c9d6a8a341a
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/datasets_utils/data_preprocessing.py
@@ -0,0 +1,360 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import csv
+import json
+import os
+import pickle
+import random
+import re
+import string
+from collections import Counter
+
+import numpy as np
+
+from nemo import logging
+
+__all__ = [
+    'get_label_stats',
+    'partition_data',
+    'write_files',
+    'write_data',
+    'create_dataset',
+    'read_csv',
+    'get_dataset',
+    'partition',
+    'map_entities',
+    'get_entities',
+    'get_data',
+    'reverse_dict',
+    'get_intent_labels',
+    'get_stats',
+    'DATABASE_EXISTS_TMP',
+    'MODE_EXISTS_TMP',
+    'is_whitespace',
+    'write_vocab',
+    'if_exist',
+    'remove_punctuation_from_sentence',
+    'dataset_to_ids',
+    'get_freq_weights',
+    'fill_class_weights',
+    'calc_class_weights',
+]
+
+DATABASE_EXISTS_TMP = '{} dataset has already been processed and stored at {}'
+MODE_EXISTS_TMP = '{} mode of {} dataset has already been processed and stored at {}'
+
+
+def get_label_stats(labels, outfile='stats.tsv'):
+    '''
+
+    Args:
+        labels: list of all labels
+        outfile: path to the file where to save label stats
+
+    Returns:
+        total (int): total number of labels
+        label_frequencies (list of tuples): each tuple represent (label, label frequency)
+    '''
+    labels = Counter(labels)
+    total = sum(labels.values())
+    out = open(outfile, 'w')
+    i = 0
+    freq_dict = {}
+    label_frequencies = labels.most_common()
+    for k, v in label_frequencies:
+        out.write(f'{k}\t\t{round(v/total,5)}\t\t{v}\n')
+        if i < 3:
+            logging.info(f'{i} item: {k}, {v} out of {total}, {v / total}.')
+        i += 1
+        freq_dict[k] = v
+
+    return total, freq_dict, max(labels.keys())
+
+
+def partition_data(intent_queries, slot_tags, split=0.1):
+    n = len(intent_queries)
+    n_dev = int(n * split)
+    dev_idx = set(random.sample(range(n), n_dev))
+    dev_intents, dev_slots, train_intents, train_slots = [], [], [], []
+
+    dev_intents.append('sentence\tlabel\n')
+    train_intents.append('sentence\tlabel\n')
+
+    for i, item in enumerate(intent_queries):
+        if i in dev_idx:
+            dev_intents.append(item)
+            dev_slots.append(slot_tags[i])
+        else:
+            train_intents.append(item)
+            train_slots.append(slot_tags[i])
+    return train_intents, train_slots, dev_intents, dev_slots
+
+
+def write_files(data, outfile):
+    with open(outfile, 'w') as f:
+        for item in data:
+            item = f'{item.strip()}\n'
+            f.write(item)
+
+
+def write_data(data, slot_dict, intent_dict, outfold, mode, uncased):
+    intent_file = open(f'{outfold}/{mode}.tsv', 'w')
+    intent_file.write('sentence\tlabel\n')
+    slot_file = open(f'{outfold}/{mode}_slots.tsv', 'w')
+    for tokens, slots, intent in data:
+        text = ' '.join(tokens)
+        if uncased:
+            text = text.lower()
+        intent_file.write(f'{text}\t{intent_dict[intent]}\n')
+        slots = [str(slot_dict[slot]) for slot in slots]
+        slot_file.write(' '.join(slots) + '\n')
+    intent_file.close()
+    slot_file.close()
+
+
+def create_dataset(train, dev, slots, intents, uncased, outfold):
+    os.makedirs(outfold, exist_ok=True)
+    if 'O' in slots:
+        slots.remove('O')
+    slots = sorted(list(slots)) + ['O']
+    intents = sorted(list(intents))
+    slots = write_vocab(slots, f'{outfold}/dict.slots.csv')
+    intents = write_vocab(intents, f'{outfold}/dict.intents.csv')
+    write_data(train, slots, intents, outfold, 'train', uncased)
+    write_data(dev, slots, intents, outfold, 'test', uncased)
+
+
+def read_csv(file_path):
+    rows = []
+    with open(file_path, 'r') as csvfile:
+        read_csv = csv.reader(csvfile, delimiter=',')
+        for row in read_csv:
+            rows.append(row)
+    return rows
+
+
+def get_dataset(files, dev_split=0.1):
+    # entity2value, value2entity = get_entities(files)
+    data, slots, intents = get_data(files)
+    if len(data) == 1:
+        train, dev = partition(data[0], split=dev_split)
+    else:
+        train, dev = data[0], data[1]
+    return train, dev, slots, intents
+
+
+def partition(data, split=0.1):
+    n = len(data)
+    n_dev = int(n * split)
+    dev_idx = set(random.sample(range(n), n_dev))
+    dev, train = [], []
+
+    for i, item in enumerate(data):
+        if i in dev_idx:
+            dev.append(item)
+        else:
+            train.append(item)
+    return train, dev
+
+
+def map_entities(entity2value, entities):
+    for key in entities:
+        if 'data' in entities[key]:
+            if key not in entity2value:
+                entity2value[key] = set([])
+
+            values = []
+            for value in entities[key]['data']:
+                values.append(value['value'])
+                values.extend(value['synonyms'])
+            entity2value[key] = entity2value[key] | set(values)
+
+    return entity2value
+
+
+def get_entities(files):
+    entity2value = {}
+    for file in files:
+        with open(file, 'r') as json_file:
+            data = json.load(json_file)
+            entity2value = map_entities(entity2value, data['entities'])
+
+    value2entity = reverse_dict(entity2value)
+    return entity2value, value2entity
+
+
+def get_data(files):
+    all_data, all_slots, all_intents = [], set(['O']), set()
+    for file in files:
+        file_data = []
+        with open(file, 'r') as json_file:
+            data = json.load(json_file)
+            for intent in data['intents']:
+                all_intents.add(intent)
+                utterances = data['intents'][intent]['utterances']
+                for utterance in utterances:
+                    tokens, slots = [], []
+                    for frag in utterance['data']:
+                        frag_tokens = frag['text'].strip().split()
+                        tokens.extend(frag_tokens)
+                        if 'slot_name' not in frag:
+                            slot = 'O'
+                        else:
+                            slot = frag['slot_name']
+                            all_slots.add(slot)
+                        slots.extend([slot] * len(frag_tokens))
+                    file_data.append((tokens, slots, intent))
+        all_data.append(file_data)
+    return all_data, all_slots, all_intents
+
+
+def reverse_dict(entity2value):
+    value2entity = {}
+    for entity in entity2value:
+        for value in entity2value[entity]:
+            value2entity[value] = entity
+    return value2entity
+
+
+def get_intent_labels(intent_file):
+    labels = {}
+    label = 0
+    with open(intent_file, 'r') as f:
+        for line in f:
+            intent = line.strip()
+            labels[intent] = label
+            label += 1
+    return labels
+
+
+def get_stats(lengths):
+    lengths = np.asarray(lengths)
+    logging.info(
+        f'Min: {np.min(lengths)} | \
+                 Max: {np.max(lengths)} | \
+                 Mean: {np.mean(lengths)} | \
+                 Median: {np.median(lengths)}'
+    )
+    logging.info(f'75 percentile: {np.percentile(lengths, 75)}')
+    logging.info(f'99 percentile: {np.percentile(lengths, 99)}')
+
+
+def is_whitespace(c):
+    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+        return True
+    return False
+
+
+def write_vocab(items, outfile):
+    vocab = {}
+    idx = 0
+    with open(outfile, 'w') as f:
+        for item in items:
+            f.write(item + '\n')
+            vocab[item] = idx
+            idx += 1
+    return vocab
+
+
+def if_exist(outfold, files):
+    if not os.path.exists(outfold):
+        return False
+    for file in files:
+        if not os.path.exists(f'{outfold}/{file}'):
+            return False
+    return True
+
+
+def remove_punctuation_from_sentence(sentence):
+    sentence = re.sub('[' + string.punctuation + ']', '', sentence)
+    sentence = sentence.lower()
+    return sentence
+
+
+def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
+    """
+    Reads dataset from file line by line, tokenizes each line with tokenizer,
+    and returns list of lists which corresponds to ids of tokenized strings.
+
+    Args:
+        dataset: path to dataset
+        tokenizer: tokenizer to convert text into ids
+        cache_ids: if True, ids are saved to disk as pickle file
+            with similar name (e.g., data.txt --> data.txt.pkl)
+        add_bos_eos: bool, whether to add <s> and </s> symbols (e.g., for NMT)
+    Returns:
+        ids: list of ids which correspond to tokenized strings of the dataset
+    """
+
+    cached_ids_dataset = dataset + str(".pkl")
+    if os.path.isfile(cached_ids_dataset):
+        logging.info("Loading cached tokenized dataset ...")
+        ids = pickle.load(open(cached_ids_dataset, "rb"))
+    else:
+        logging.info("Tokenizing dataset ...")
+        data = open(dataset, "rb").readlines()
+        ids = []
+        for sentence in data:
+            sent_ids = tokenizer.text_to_ids(sentence.decode("utf-8"))
+            if add_bos_eos:
+                sent_ids = [tokenizer.bos_id] + sent_ids + [tokenizer.eos_id]
+            ids.append(sent_ids)
+        if cache_ids:
+            logging.info("Caching tokenized dataset ...")
+            pickle.dump(ids, open(cached_ids_dataset, "wb"))
+    return ids
+
+
+def get_freq_weights(label_freq):
+    """
+    Goal is to give more weight to the classes with less samples
+    so as to match the ones with the higher frequencies. We achieve this by
+    dividing the total frequency by the freq of each label to calculate its weight.
+    """
+    total_size = 0
+    for lf in label_freq.values():
+        total_size += lf
+    weighted_slots = {label: (total_size / (len(label_freq) * freq)) for label, freq in label_freq.items()}
+    return weighted_slots
+
+
+def fill_class_weights(weights, max_id=-1):
+    """
+    Gets a dictionary of labels with their weights and creates a list with size of the labels filled with those weights.
+    Missing labels in the dictionary would get value 1.
+
+    Args:
+        weights: dictionary of weights for labels, labels as keys and weights are their values
+        max_id: the largest label id in the dataset, default=-1 would consider the largest label in the weights dictionary as max_id
+    Returns:
+        weights_list: list of weights for labels
+    """
+    if max_id < 0:
+        max_id = 0
+        for l in weights.keys():
+            max_id = max(max_id, l)
+
+    all_weights = [1.0] * (max_id + 1)
+    for i in range(len(all_weights)):
+        if i in weights:
+            all_weights[i] = weights[i]
+    return all_weights
+
+
+def calc_class_weights(label_freq, max_id=-1):
+    weights_dict = get_freq_weights(label_freq)
+    return fill_class_weights(weights_dict, max_id=max_id)
diff --git a/nemo/collections/nlp/data/datasets/datasets_utils/datasets_processing.py b/nemo/collections/nlp/data/datasets/datasets_utils/datasets_processing.py
new file mode 100644
index 000000000000..2cabbb2d13fc
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/datasets_utils/datasets_processing.py
@@ -0,0 +1,47 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import csv
+
+__all__ = ['DataProcessor']
+
+
+class DataProcessor(object):
+    """Base class for data converters for sequence classification data sets."""
+
+    def get_train_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the train set."""
+        raise NotImplementedError()
+
+    def get_dev_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the dev set."""
+        raise NotImplementedError()
+
+    def get_labels(self):
+        """Gets the list of labels for this data set."""
+        raise NotImplementedError()
+
+    @classmethod
+    def _read_tsv(cls, input_file, quotechar=None):
+        """Reads a tab separated value file."""
+        with open(input_file, "r", encoding="utf-8-sig") as f:
+            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+            lines = []
+            for line in reader:
+                # if sys.version_info[0] == 2:
+                #     line = list(unicode(cell, 'utf-8') for cell in line)
+                lines.append(line)
+            return lines
diff --git a/nemo/collections/nlp/data/datasets/glue_benchmark_dataset.py b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset.py
deleted file mode 100644
index 26423c3aa549..000000000000
--- a/nemo/collections/nlp/data/datasets/glue_benchmark_dataset.py
+++ /dev/null
@@ -1,593 +0,0 @@
-"""
-Copyright 2018 The Google AI Language Team Authors and
-The HuggingFace Inc. team.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Utility functions for GLUE tasks
-Some transformer of this code were adapted from the HuggingFace library at
-https://github.com/huggingface/transformers
-"""
-import csv
-import os
-
-import numpy as np
-from torch.utils.data import Dataset
-
-from nemo import logging
-
-__all__ = ['GLUEDataset']
-
-
-class GLUEDataset(Dataset):
-    def __init__(self, data_dir, tokenizer, max_seq_length, processor, output_mode, evaluate, token_params):
-        self.tokenizer = tokenizer
-        self.label_list = processor.get_labels()
-        self.examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir)
-        self.features = convert_examples_to_features(
-            self.examples, self.label_list, max_seq_length, tokenizer, output_mode, **token_params
-        )
-
-    def __len__(self):
-        return len(self.features)
-
-    def __getitem__(self, idx):
-        feature = self.features[idx]
-        return (
-            np.array(feature.input_ids),
-            np.array(feature.segment_ids),
-            np.array(feature.input_mask, dtype=np.long),
-            np.array(feature.label_id),
-        )
-
-
-def convert_examples_to_features(
-    examples,
-    label_list,
-    max_seq_length,
-    tokenizer,
-    output_mode,
-    bos_token=None,
-    eos_token='[SEP]',
-    pad_token='[PAD]',
-    cls_token='[CLS]',
-    sep_token_extra=None,
-    cls_token_at_end=False,
-    cls_token_segment_id=0,
-    pad_token_segment_id=0,
-    pad_on_left=False,
-    mask_padding_with_zero=True,
-    sequence_a_segment_id=0,
-    sequence_b_segment_id=1,
-):
-    """ Loads a data file into a list of `InputBatch`s
-        `cls_token_at_end` define the location of the CLS token:
-            - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
-            - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
-        `cls_token_segment_id` define the segment id associated to the CLS
-        token (0 for BERT, 2 for XLNet)
-         The convention in BERT is:
-         (a) For sequence pairs:
-          tokens:   [CLS] is this jack ##ville ? [SEP] no it is not . [SEP]
-          type_ids:   0   0  0    0    0       0   0   1  1  1  1   1   1
-         (b) For single sequences:
-          tokens:   [CLS] the dog is hairy . [SEP]
-          type_ids:   0   0   0   0  0     0   0
-         Where "type_ids" are used to indicate whether this is the first
-         sequence or the second sequence. The embedding vectors for `type=0`
-         and `type=1` were learned during pre-training and are added to the
-         wordpiece embedding vector (and position vector). This is
-         not *strictly* necessarysince the [SEP] token unambiguously separates
-         the sequences, but it makes it easier for the model to learn
-         the concept of sequences.
-         For classification tasks, the first vector (corresponding to [CLS])
-         is used as as the "sentence vector". Note that this only makes sense
-         because the entire model is fine-tuned.
-         For NMT:
-         (a) For sequence pairs:
-          tokens:<BOS> is this jack ##ville ? <EOS> <BOS> no it is not . <EOS>
-          type_ids:0   0  0    0    0       0   0     1   1  1  1  1   1   1
-         (b) For single sequences:
-          tokens:   <BOS> the dog is hairy . <EOS>
-          type_ids:   0   0   0   0  0     0   0
-    """
-    label_map = {label: i for i, label in enumerate(label_list)}
-
-    features = []
-    for ex_index, example in enumerate(examples):
-        if ex_index % 10000 == 0:
-            logging.info("Writing example %d of %d" % (ex_index, len(examples)))
-
-        tokens_a = tokenizer.text_to_tokens(example.text_a)
-
-        tokens_b = None
-        if example.text_b:
-            tokens_b = tokenizer.text_to_tokens(example.text_b)
-
-            special_tokens_count = 2 if eos_token else 0
-            special_tokens_count += 1 if sep_token_extra else 0
-            special_tokens_count += 2 if bos_token else 0
-            special_tokens_count += 1 if cls_token else 0
-            _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
-        else:
-            special_tokens_count = 1 if eos_token else 0
-            special_tokens_count += 1 if sep_token_extra else 0
-            special_tokens_count += 1 if bos_token else 0
-            if len(tokens_a) > max_seq_length - special_tokens_count:
-                tokens_a = tokens_a[: max_seq_length - special_tokens_count]
-        # Add special tokens to sequence_a
-        tokens = tokens_a
-        if bos_token:
-            tokens = [bos_token] + tokens
-        if eos_token:
-            tokens += [eos_token]
-        segment_ids = [sequence_a_segment_id] * len(tokens)
-
-        # Add sequence separator between sequences
-        if tokens_b and sep_token_extra:
-            tokens += [sep_token_extra]
-            segment_ids += [sequence_a_segment_id]
-
-        # Add special tokens to sequence_b
-        if tokens_b:
-            if bos_token:
-                tokens += [bos_token]
-                segment_ids += [sequence_b_segment_id]
-            tokens += tokens_b
-            segment_ids += [sequence_b_segment_id] * (len(tokens_b))
-            if eos_token:
-                tokens += [eos_token]
-                segment_ids += [sequence_b_segment_id]
-
-        # Add classification token - for BERT models
-        if cls_token:
-            if cls_token_at_end:
-                tokens += [cls_token]
-                segment_ids += [cls_token_segment_id]
-            else:
-                tokens = [cls_token] + tokens
-                segment_ids = [cls_token_segment_id] + segment_ids
-        input_ids = tokenizer.tokens_to_ids(tokens)
-
-        # The mask has 1 for real tokens and 0 for padding tokens. Only real
-        # tokens are attended to.
-        input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
-
-        # Zero-pad up to the sequence length.
-        padding_length = max_seq_length - len(input_ids)
-        pad_token_id = tokenizer.tokens_to_ids([pad_token])[0]
-        if pad_on_left:
-            input_ids = ([pad_token_id] * padding_length) + input_ids
-            input_mask = ([0 if mask_padding_with_zero else 1] * padding_length) + input_mask
-            segment_ids = ([pad_token_segment_id] * padding_length) + segment_ids
-        else:
-            input_ids = input_ids + ([pad_token_id] * padding_length)
-            input_mask = input_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
-            segment_ids = segment_ids + ([pad_token_segment_id] * padding_length)
-        if len(input_ids) != max_seq_length:
-            raise ValueError("input_ids must be of length max_seq_length")
-        if len(input_mask) != max_seq_length:
-            raise ValueError("input_mask must be of length max_seq_length")
-        if len(segment_ids) != max_seq_length:
-            raise ValueError("segment_ids must be of length max_seq_length")
-        if output_mode == "classification":
-            label_id = label_map[example.label]
-        elif output_mode == "regression":
-            label_id = np.float32(example.label)
-        else:
-            raise KeyError(output_mode)
-
-        if ex_index < 5:
-            logging.info("*** Example ***")
-            logging.info("guid: %s" % (example.guid))
-            logging.info("tokens: %s" % " ".join(list(map(str, tokens))))
-            logging.info("input_ids: %s" % " ".join(list(map(str, input_ids))))
-            logging.info("input_mask: %s" % " ".join(list(map(str, input_mask))))
-            logging.info("segment_ids: %s" % " ".join(list(map(str, segment_ids))))
-            logging.info("label: %s (id = %d)" % (example.label, label_id))
-
-        features.append(
-            InputFeatures(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_id=label_id)
-        )
-    return features
-
-
-def _truncate_seq_pair(tokens_a, tokens_b, max_length):
-    """Truncates a sequence pair in place to the maximum length.
-
-     This will always truncate the longer sequence one token at a time.
-     This makes more sense than truncating an equal percent
-     of tokens from each, since if one sequence is very short then each token
-     that's truncated likely contains more information than a longer sequence.
-    """
-    while True:
-        total_length = len(tokens_a) + len(tokens_b)
-        if total_length <= max_length:
-            break
-        if len(tokens_a) > len(tokens_b):
-            tokens_a.pop()
-        else:
-            tokens_b.pop()
-
-
-"""
-Utility functions for GLUE tasks
-This code was adapted from the HuggingFace library at
-https://github.com/huggingface/transformers
-"""
-
-
-class InputFeatures(object):
-    """A single set of features of data."""
-
-    def __init__(self, input_ids, input_mask, segment_ids, label_id):
-        self.input_ids = input_ids
-        self.input_mask = input_mask
-        self.segment_ids = segment_ids
-        self.label_id = label_id
-
-
-class InputExample(object):
-    """A single training/test example for simple sequence classification."""
-
-    def __init__(self, guid, text_a, text_b=None, label=None):
-        """Constructs a InputExample.
-
-        Args:
-            guid: Unique id for the example.
-            text_a: string. The untokenized text of the first sequence.
-            For single sequence tasks, only this sequence must be specified.
-            text_b: (Optional) string. The untokenized text of the second
-            sequence. Only must be specified for sequence pair tasks.
-            label: (Optional) string. The label of the example. This should be
-            specified for train and dev examples, but not for test examples.
-        """
-        self.guid = guid
-        self.text_a = text_a
-        self.text_b = text_b
-        self.label = label
-
-
-class DataProcessor(object):
-    """Base class for data converters for sequence classification data sets."""
-
-    def get_train_examples(self, data_dir):
-        """Gets a collection of `InputExample`s for the train set."""
-        raise NotImplementedError()
-
-    def get_dev_examples(self, data_dir):
-        """Gets a collection of `InputExample`s for the dev set."""
-        raise NotImplementedError()
-
-    def get_labels(self):
-        """Gets the list of labels for this data set."""
-        raise NotImplementedError()
-
-    @classmethod
-    def _read_tsv(cls, input_file, quotechar=None):
-        """Reads a tab separated value file."""
-        with open(input_file, "r", encoding="utf-8-sig") as f:
-            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
-            lines = []
-            for line in reader:
-                # if sys.version_info[0] == 2:
-                #     line = list(unicode(cell, 'utf-8') for cell in line)
-                lines.append(line)
-            return lines
-
-
-class MrpcProcessor(DataProcessor):
-    """Processor for the MRPC data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        logging.info(f'LOOKING AT {os.path.join(data_dir, "train.tsv")}')
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, i)
-            text_a = line[3]
-            text_b = line[4]
-            label = line[0]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class MnliProcessor(DataProcessor):
-    """Processor for the MultiNLI data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), "dev_matched")
-
-    def get_labels(self):
-        """See base class."""
-        return ["contradiction", "entailment", "neutral"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            text_a = line[8]
-            text_b = line[9]
-            label = line[-1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class MnliMismatchedProcessor(MnliProcessor):
-    """Processor for the MultiNLI Mismatched data set (GLUE version)."""
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), "dev_matched")
-
-
-class ColaProcessor(DataProcessor):
-    """Processor for the CoLA data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            guid = "%s-%s" % (set_type, i)
-            text_a = line[3]
-            label = line[1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
-        return examples
-
-
-class Sst2Processor(DataProcessor):
-    """Processor for the SST-2 data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, i)
-            text_a = line[0]
-            label = line[1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
-        return examples
-
-
-class StsbProcessor(DataProcessor):
-    """Processor for the STS-B data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return [None]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            text_a = line[7]
-            text_b = line[8]
-            label = line[-1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class QqpProcessor(DataProcessor):
-    """Processor for the QQP data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            try:
-                text_a = line[3]
-                text_b = line[4]
-                label = line[5]
-            except IndexError:
-                continue
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class QnliProcessor(DataProcessor):
-    """Processor for the QNLI data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev_matched")
-
-    def get_labels(self):
-        """See base class."""
-        return ["entailment", "not_entailment"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            text_a = line[1]
-            text_b = line[2]
-            label = line[-1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class RteProcessor(DataProcessor):
-    """Processor for the RTE data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["entailment", "not_entailment"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            text_a = line[1]
-            text_b = line[2]
-            label = line[-1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-class WnliProcessor(DataProcessor):
-    """Processor for the WNLI data set (GLUE version)."""
-
-    def get_train_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
-
-    def get_dev_examples(self, data_dir):
-        """See base class."""
-        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
-
-    def get_labels(self):
-        """See base class."""
-        return ["0", "1"]
-
-    def _create_examples(self, lines, set_type):
-        """Creates examples for the training and dev sets."""
-        examples = []
-        for (i, line) in enumerate(lines):
-            if i == 0:
-                continue
-            guid = "%s-%s" % (set_type, line[0])
-            text_a = line[1]
-            text_b = line[2]
-            label = line[-1]
-            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
-        return examples
-
-
-processors = {
-    "cola": ColaProcessor,
-    "mnli": MnliProcessor,
-    "mnli-mm": MnliMismatchedProcessor,
-    "mrpc": MrpcProcessor,
-    "sst-2": Sst2Processor,
-    "sts-b": StsbProcessor,
-    "qqp": QqpProcessor,
-    "qnli": QnliProcessor,
-    "rte": RteProcessor,
-    "wnli": WnliProcessor,
-}
-output_modes = {
-    "cola": "classification",
-    "mnli": "classification",
-    "mnli-mm": "classification",
-    "mrpc": "classification",
-    "sst-2": "classification",
-    "sts-b": "regression",
-    "qqp": "classification",
-    "qnli": "classification",
-    "rte": "classification",
-    "wnli": "classification",
-}
-GLUE_TASKS_NUM_LABELS = {
-    "cola": 2,
-    "mnli": 3,
-    "mrpc": 2,
-    "sst-2": 2,
-    "sts-b": 1,
-    "qqp": 2,
-    "qnli": 2,
-    "rte": 2,
-    "wnli": 2,
-}
diff --git a/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/__init__.py b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/__init__.py
new file mode 100644
index 000000000000..d396af9c88fb
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/__init__.py
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.data.datasets.glue_benchmark_dataset.data_processors import *
+from nemo.collections.nlp.data.datasets.glue_benchmark_dataset.glue_benchmark_dataset import *
diff --git a/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/data_processors.py b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/data_processors.py
new file mode 100644
index 000000000000..6bf4b31d515b
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/data_processors.py
@@ -0,0 +1,325 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils.datasets_processing import DataProcessor
+
+__all__ = [
+    'ColaProcessor',
+    'MnliProcessor',
+    'MnliMismatchedProcessor',
+    'MrpcProcessor',
+    'Sst2Processor',
+    'StsbProcessor',
+    'QqpProcessor',
+    'QnliProcessor',
+    'RteProcessor',
+    'WnliProcessor',
+]
+
+
+class MrpcProcessor(DataProcessor):
+    """Processor for the MRPC data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        logging.info(f'LOOKING AT {os.path.join(data_dir, "train.tsv")}')
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, i)
+            text_a = line[3]
+            text_b = line[4]
+            label = line[0]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class MnliProcessor(DataProcessor):
+    """Processor for the MultiNLI data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_matched.tsv")), "dev_matched")
+
+    def get_labels(self):
+        """See base class."""
+        return ["contradiction", "entailment", "neutral"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            text_a = line[8]
+            text_b = line[9]
+            label = line[-1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class MnliMismatchedProcessor(MnliProcessor):
+    """Processor for the MultiNLI Mismatched data set (GLUE version)."""
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), "dev_matched")
+
+
+class ColaProcessor(DataProcessor):
+    """Processor for the CoLA data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line[3]
+            label = line[1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
+        return examples
+
+
+class Sst2Processor(DataProcessor):
+    """Processor for the SST-2 data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, i)
+            text_a = line[0]
+            label = line[1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
+        return examples
+
+
+class StsbProcessor(DataProcessor):
+    """Processor for the STS-B data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return [None]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            text_a = line[7]
+            text_b = line[8]
+            label = line[-1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class QqpProcessor(DataProcessor):
+    """Processor for the QQP data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            try:
+                text_a = line[3]
+                text_b = line[4]
+                label = line[5]
+            except IndexError:
+                continue
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class QnliProcessor(DataProcessor):
+    """Processor for the QNLI data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev_matched")
+
+    def get_labels(self):
+        """See base class."""
+        return ["entailment", "not_entailment"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            text_a = line[1]
+            text_b = line[2]
+            label = line[-1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class RteProcessor(DataProcessor):
+    """Processor for the RTE data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["entailment", "not_entailment"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            text_a = line[1]
+            text_b = line[2]
+            label = line[-1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class WnliProcessor(DataProcessor):
+    """Processor for the WNLI data set (GLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            if i == 0:
+                continue
+            guid = "%s-%s" % (set_type, line[0])
+            text_a = line[1]
+            text_b = line[2]
+            label = line[-1]
+            examples.append(InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class InputExample(object):
+    """A single training/test example for simple sequence classification."""
+
+    def __init__(self, guid, text_a, text_b=None, label=None):
+        """Constructs a InputExample.
+
+        Args:
+            guid: Unique id for the example.
+            text_a: string. The untokenized text of the first sequence.
+            For single sequence tasks, only this sequence must be specified.
+            text_b: (Optional) string. The untokenized text of the second
+            sequence. Only must be specified for sequence pair tasks.
+            label: (Optional) string. The label of the example. This should be
+            specified for train and dev examples, but not for test examples.
+        """
+        self.guid = guid
+        self.text_a = text_a
+        self.text_b = text_b
+        self.label = label
diff --git a/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/glue_benchmark_dataset.py b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/glue_benchmark_dataset.py
new file mode 100644
index 000000000000..675ef7ed0445
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/glue_benchmark_dataset/glue_benchmark_dataset.py
@@ -0,0 +1,307 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Utility functions for GLUE tasks
+Some transformer of this code were adapted from the HuggingFace library at
+https://github.com/huggingface/transformers
+"""
+
+import os
+import pickle
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.glue_benchmark_dataset.data_processors import *
+
+__all__ = ['GLUEDataset', 'output_modes', 'processors']
+
+processors = {
+    "cola": ColaProcessor,
+    "mnli": MnliProcessor,
+    "mnli-mm": MnliMismatchedProcessor,
+    "mrpc": MrpcProcessor,
+    "sst-2": Sst2Processor,
+    "sts-b": StsbProcessor,
+    "qqp": QqpProcessor,
+    "qnli": QnliProcessor,
+    "rte": RteProcessor,
+    "wnli": WnliProcessor,
+}
+output_modes = {
+    "cola": "classification",
+    "mnli": "classification",
+    "mnli-mm": "classification",
+    "mrpc": "classification",
+    "sst-2": "classification",
+    "sts-b": "regression",
+    "qqp": "classification",
+    "qnli": "classification",
+    "rte": "classification",
+    "wnli": "classification",
+}
+GLUE_TASKS_NUM_LABELS = {
+    "cola": 2,
+    "mnli": 3,
+    "mrpc": 2,
+    "sst-2": 2,
+    "sts-b": 1,
+    "qqp": 2,
+    "qnli": 2,
+    "rte": 2,
+    "wnli": 2,
+}
+
+
+class GLUEDataset(Dataset):
+    def __init__(self, data_dir, tokenizer, max_seq_length, processor, output_mode, evaluate, use_data_cache):
+
+        self.tokenizer = tokenizer
+        self.label_list = processor.get_labels()
+        self.examples = processor.get_dev_examples(data_dir) if evaluate else processor.get_train_examples(data_dir)
+        processor_name = type(processor).__name__
+        tokenizer_type = type(tokenizer.tokenizer).__name__
+        vocab_size = getattr(tokenizer, "vocab_size", 0)
+        cached_features_file = os.path.join(
+            data_dir,
+            "cached_{}_{}_{}_{}_{}".format(
+                processor_name, "dev" if evaluate else "train", tokenizer_type, str(max_seq_length), str(vocab_size)
+            ),
+        )
+
+        if use_data_cache and os.path.exists(cached_features_file):
+            logging.info(f"loading from {cached_features_file}")
+            with open(cached_features_file, "rb") as reader:
+                self.features = pickle.load(reader)
+        else:
+            token_params = {
+                'bos_token': None,
+                'eos_token': tokenizer.eos_token,
+                'pad_token': tokenizer.pad_token,
+                'cls_token': tokenizer.cls_token,
+                'sep_token_extra': tokenizer.eos_token if 'roberta' in tokenizer_type.lower() else None,
+            }
+
+            self.features = self.convert_examples_to_features(
+                self.examples, self.label_list, max_seq_length, tokenizer, output_mode, **token_params
+            )
+            if use_data_cache:
+                master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+                if master_device:
+                    logging.info(f'Saving train features into {cached_features_file}')
+                    with open(cached_features_file, "wb") as writer:
+                        pickle.dump(self.features, writer)
+
+    def __len__(self):
+        return len(self.features)
+
+    def __getitem__(self, idx):
+        feature = self.features[idx]
+        return (
+            np.array(feature.input_ids),
+            np.array(feature.segment_ids),
+            np.array(feature.input_mask, dtype=np.long),
+            np.array(feature.label_id),
+        )
+
+    def convert_examples_to_features(
+        self,
+        examples,
+        label_list,
+        max_seq_length,
+        tokenizer,
+        output_mode,
+        bos_token=None,
+        eos_token='[SEP]',
+        pad_token='[PAD]',
+        cls_token='[CLS]',
+        sep_token_extra=None,
+        cls_token_at_end=False,
+        cls_token_segment_id=0,
+        pad_token_segment_id=0,
+        pad_on_left=False,
+        mask_padding_with_zero=True,
+        sequence_a_segment_id=0,
+        sequence_b_segment_id=1,
+    ):
+        """ Loads a data file into a list of `InputBatch`s
+            `cls_token_at_end` define the location of the CLS token:
+                - False (Default, BERT/XLM pattern): [CLS] + A + [SEP] + B + [SEP]
+                - True (XLNet/GPT pattern): A + [SEP] + B + [SEP] + [CLS]
+            `cls_token_segment_id` define the segment id associated to the CLS
+            token (0 for BERT, 2 for XLNet)
+             The convention in BERT is:
+             (a) For sequence pairs:
+              tokens:   [CLS] is this jack ##ville ? [SEP] no it is not . [SEP]
+              type_ids:   0   0  0    0    0       0   0   1  1  1  1   1   1
+             (b) For single sequences:
+              tokens:   [CLS] the dog is hairy . [SEP]
+              type_ids:   0   0   0   0  0     0   0
+             Where "type_ids" are used to indicate whether this is the first
+             sequence or the second sequence. The embedding vectors for `type=0`
+             and `type=1` were learned during pre-training and are added to the
+             wordpiece embedding vector (and position vector). This is
+             not *strictly* necessarysince the [SEP] token unambiguously separates
+             the sequences, but it makes it easier for the model to learn
+             the concept of sequences.
+             For classification tasks, the first vector (corresponding to [CLS])
+             is used as as the "sentence vector". Note that this only makes sense
+             because the entire model is fine-tuned.
+             For NMT:
+             (a) For sequence pairs:
+              tokens:<BOS> is this jack ##ville ? <EOS> <BOS> no it is not . <EOS>
+              type_ids:0   0  0    0    0       0   0     1   1  1  1  1   1   1
+             (b) For single sequences:
+              tokens:   <BOS> the dog is hairy . <EOS>
+              type_ids:   0   0   0   0  0     0   0
+        """
+        label_map = {label: i for i, label in enumerate(label_list)}
+
+        features = []
+        for ex_index, example in enumerate(examples):
+            if ex_index % 10000 == 0:
+                logging.info("Writing example %d of %d" % (ex_index, len(examples)))
+
+            tokens_a = tokenizer.text_to_tokens(example.text_a)
+
+            tokens_b = None
+            if example.text_b:
+                tokens_b = tokenizer.text_to_tokens(example.text_b)
+
+                special_tokens_count = 2 if eos_token else 0
+                special_tokens_count += 1 if sep_token_extra else 0
+                special_tokens_count += 2 if bos_token else 0
+                special_tokens_count += 1 if cls_token else 0
+                self._truncate_seq_pair(tokens_a, tokens_b, max_seq_length - special_tokens_count)
+            else:
+                special_tokens_count = 1 if eos_token else 0
+                special_tokens_count += 1 if sep_token_extra else 0
+                special_tokens_count += 1 if bos_token else 0
+                if len(tokens_a) > max_seq_length - special_tokens_count:
+                    tokens_a = tokens_a[: max_seq_length - special_tokens_count]
+            # Add special tokens to sequence_a
+            tokens = tokens_a
+            if bos_token:
+                tokens = [bos_token] + tokens
+            if eos_token:
+                tokens += [eos_token]
+            segment_ids = [sequence_a_segment_id] * len(tokens)
+
+            # Add sequence separator between sequences
+            if tokens_b and sep_token_extra:
+                tokens += [sep_token_extra]
+                segment_ids += [sequence_a_segment_id]
+
+            # Add special tokens to sequence_b
+            if tokens_b:
+                if bos_token:
+                    tokens += [bos_token]
+                    segment_ids += [sequence_b_segment_id]
+                tokens += tokens_b
+                segment_ids += [sequence_b_segment_id] * (len(tokens_b))
+                if eos_token:
+                    tokens += [eos_token]
+                    segment_ids += [sequence_b_segment_id]
+
+            # Add classification token - for BERT models
+            if cls_token:
+                if cls_token_at_end:
+                    tokens += [cls_token]
+                    segment_ids += [cls_token_segment_id]
+                else:
+                    tokens = [cls_token] + tokens
+                    segment_ids = [cls_token_segment_id] + segment_ids
+            input_ids = tokenizer.tokens_to_ids(tokens)
+
+            # The mask has 1 for real tokens and 0 for padding tokens. Only real
+            # tokens are attended to.
+            input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            padding_length = max_seq_length - len(input_ids)
+            pad_token_id = tokenizer.tokens_to_ids([pad_token])[0]
+            if pad_on_left:
+                input_ids = ([pad_token_id] * padding_length) + input_ids
+                input_mask = ([0 if mask_padding_with_zero else 1] * padding_length) + input_mask
+                segment_ids = ([pad_token_segment_id] * padding_length) + segment_ids
+            else:
+                input_ids = input_ids + ([pad_token_id] * padding_length)
+                input_mask = input_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
+                segment_ids = segment_ids + ([pad_token_segment_id] * padding_length)
+            if len(input_ids) != max_seq_length:
+                raise ValueError("input_ids must be of length max_seq_length")
+            if len(input_mask) != max_seq_length:
+                raise ValueError("input_mask must be of length max_seq_length")
+            if len(segment_ids) != max_seq_length:
+                raise ValueError("segment_ids must be of length max_seq_length")
+            if output_mode == "classification":
+                label_id = label_map[example.label]
+            elif output_mode == "regression":
+                label_id = np.float32(example.label)
+            else:
+                raise KeyError(output_mode)
+
+            if ex_index < 5:
+                logging.info("*** Example ***")
+                logging.info("guid: %s" % (example.guid))
+                logging.info("tokens: %s" % " ".join(list(map(str, tokens))))
+                logging.info("input_ids: %s" % " ".join(list(map(str, input_ids))))
+                logging.info("input_mask: %s" % " ".join(list(map(str, input_mask))))
+                logging.info("segment_ids: %s" % " ".join(list(map(str, segment_ids))))
+                logging.info("label: %s (id = %d)" % (example.label, label_id))
+
+            features.append(
+                InputFeatures(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_id=label_id)
+            )
+        return features
+
+    def _truncate_seq_pair(self, tokens_a, tokens_b, max_length):
+        """Truncates a sequence pair in place to the maximum length.
+
+         This will always truncate the longer sequence one token at a time.
+         This makes more sense than truncating an equal percent
+         of tokens from each, since if one sequence is very short then each token
+         that's truncated likely contains more information than a longer sequence.
+        """
+        while True:
+            total_length = len(tokens_a) + len(tokens_b)
+            if total_length <= max_length:
+                break
+            if len(tokens_a) > len(tokens_b):
+                tokens_a.pop()
+            else:
+                tokens_b.pop()
+
+    """
+    Utility functions for GLUE tasks
+    This code was adapted from the HuggingFace library at
+    https://github.com/huggingface/transformers
+    """
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+
+    def __init__(self, input_ids, input_mask, segment_ids, label_id):
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.label_id = label_id
diff --git a/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/__init__.py b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/__init__.py
new file mode 100644
index 000000000000..2b691ff71aa3
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/__init__.py
@@ -0,0 +1,19 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset.inference_utils import *
+from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset.joint_intent_slot_dataset import *
+from nemo.collections.nlp.data.datasets.joint_intent_slot_dataset.joint_intent_slot_descriptor import *
diff --git a/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/inference_utils.py b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/inference_utils.py
new file mode 100644
index 000000000000..a886c20739bf
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/inference_utils.py
@@ -0,0 +1,50 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import numpy as np
+
+from nemo import logging
+from nemo.collections.nlp.utils import get_vocab
+
+__all__ = ['read_intent_slot_outputs']
+
+
+def read_intent_slot_outputs(
+    queries, intent_file, slot_file, intent_logits, slot_logits, slot_masks, intents=None, slots=None
+):
+    intent_dict = get_vocab(intent_file)
+    slot_dict = get_vocab(slot_file)
+    pred_intents = np.argmax(intent_logits, 1)
+    pred_slots = np.argmax(slot_logits, axis=2)
+    slot_masks = slot_masks > 0.5
+    for i, query in enumerate(queries):
+        logging.info(f'Query: {query}')
+        pred = pred_intents[i]
+        logging.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}')
+        if intents is not None:
+            logging.info(f'True intent:\t{intents[i]}\t{intent_dict[intents[i]]}')
+
+        pred_slot = pred_slots[i][slot_masks[i]]
+        tokens = query.strip().split()
+
+        if len(pred_slot) != len(tokens):
+            raise ValueError('Pred_slot and tokens must be of the same length')
+
+        for j, token in enumerate(tokens):
+            output = f'{token}\t{slot_dict[pred_slot[j]]}'
+            if slots is not None:
+                output = f'{output}\t{slot_dict[slots[i][j]]}'
+            logging.info(output)
diff --git a/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset.py b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_dataset.py
similarity index 51%
rename from nemo/collections/nlp/data/datasets/joint_intent_slot_dataset.py
rename to nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_dataset.py
index 4abc70923226..e6795c14b289 100644
--- a/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset.py
+++ b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_dataset.py
@@ -1,6 +1,7 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
 # Copyright 2018 The Google AI Language Team Authors and
 # The HuggingFace Inc. team.
-# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,31 +14,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# =============================================================================
+
 """
 Utility functions for Token Classification NLP tasks
 Some parts of this code were adapted from the HuggingFace library at
 https://github.com/huggingface/pytorch-pretrained-BERT
 """
-import itertools
-import random
 
 import numpy as np
 from torch.utils.data import Dataset
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets.datasets_utils import (
-    get_label_stats,
-    get_stats,
-    merge,
-    process_atis,
-    process_dialogflow,
-    process_jarvis_datasets,
-    process_mturk,
-    process_snips,
-)
-from nemo.collections.nlp.utils.common_nlp_utils import calc_class_weights, get_vocab, if_exist, label2idx
+from nemo.collections.nlp.data.datasets.datasets_utils import get_stats
 
-__all__ = ['BertJointIntentSlotDataset', 'BertJointIntentSlotInferDataset', 'JointIntentSlotDataDesc']
+__all__ = ['BertJointIntentSlotDataset', 'BertJointIntentSlotInferDataset']
 
 
 def get_features(
@@ -64,14 +55,14 @@ def get_features(
 
     for i, query in enumerate(queries):
         words = query.strip().split()
-        subtokens = ['[CLS]']
+        subtokens = [tokenizer.cls_token]
         loss_mask = [1 - ignore_start_end]
         subtokens_mask = [0]
         if with_label:
             slots = [pad_label]
 
         for j, word in enumerate(words):
-            word_tokens = tokenizer.tokenize(word)
+            word_tokens = tokenizer.text_to_tokens(word)
             subtokens.extend(word_tokens)
 
             loss_mask.append(1)
@@ -83,8 +74,8 @@ def get_features(
             if with_label:
                 slots.extend([raw_slots[i][j]] * len(word_tokens))
 
-        subtokens.append('[SEP]')
-        loss_mask.append(not ignore_start_end)
+        subtokens.append(tokenizer.sep_token)
+        loss_mask.append(1 - ignore_start_end)
         subtokens_mask.append(0)
         sent_lengths.append(len(subtokens))
         all_subtokens.append(subtokens)
@@ -102,7 +93,7 @@ def get_features(
 
     for i, subtokens in enumerate(all_subtokens):
         if len(subtokens) > max_seq_length:
-            subtokens = ['[CLS]'] + subtokens[-max_seq_length + 1 :]
+            subtokens = [tokenizer.cls_token] + subtokens[-max_seq_length + 1 :]
             all_input_mask[i] = [1] + all_input_mask[i][-max_seq_length + 1 :]
             all_loss_mask[i] = [1 - ignore_start_end] + all_loss_mask[i][-max_seq_length + 1 :]
             all_subtokens_mask[i] = [0] + all_subtokens_mask[i][-max_seq_length + 1 :]
@@ -111,7 +102,7 @@ def get_features(
                 all_slots[i] = [pad_label] + all_slots[i][-max_seq_length + 1 :]
             too_long_count += 1
 
-        all_input_ids.append([tokenizer._convert_token_to_id(t) for t in subtokens])
+        all_input_ids.append([tokenizer.tokens_to_ids(t) for t in subtokens])
 
         if len(subtokens) < max_seq_length:
             extra = max_seq_length - len(subtokens)
@@ -127,6 +118,16 @@ def get_features(
 
     logging.info(f'{too_long_count} are longer than {max_seq_length}')
 
+    logging.info("*** Some Examples of Processed Data***")
+    for i in range(min(len(all_input_ids), 5)):
+        logging.info("i: %s" % (i))
+        logging.info("subtokens: %s" % " ".join(list(map(str, all_subtokens[i]))))
+        logging.info("loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i]))))
+        logging.info("input_mask: %s" % " ".join(list(map(str, all_input_mask[i]))))
+        logging.info("subtokens_mask: %s" % " ".join(list(map(str, all_subtokens_mask[i]))))
+        if with_label:
+            logging.info("slots_label: %s" % " ".join(list(map(str, all_slots[i]))))
+
     return (all_input_ids, all_segment_ids, all_input_mask, all_loss_mask, all_subtokens_mask, all_slots)
 
 
@@ -148,10 +149,9 @@ class BertJointIntentSlotDataset(Dataset):
         slot_file (str): file to slot labels, each line corresponding to
             slot labels for a sentence in input_file. No header.
         max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
-        tokenizer (Tokenizer): such as BertTokenizer
+        tokenizer (Tokenizer): such as NemoBertTokenizer
         num_samples (int): number of samples you want to use for the dataset.
             If -1, use all dataset. Useful for testing.
-        shuffle (bool): whether to shuffle your data.
         pad_label (int): pad value use for slot labels.
             by default, it's the neutral label.
 
@@ -164,10 +164,10 @@ def __init__(
         max_seq_length,
         tokenizer,
         num_samples=-1,
-        shuffle=True,
         pad_label=128,
         ignore_extra_tokens=False,
         ignore_start_end=False,
+        do_lower_case=False,
     ):
         if num_samples == 0:
             raise ValueError("num_samples has to be positive", num_samples)
@@ -182,8 +182,6 @@ def __init__(
 
         dataset = list(zip(slot_lines, input_lines))
 
-        if shuffle or num_samples > 0:
-            random.shuffle(dataset)
         if num_samples > 0:
             dataset = dataset[:num_samples]
 
@@ -192,7 +190,10 @@ def __init__(
             raw_slots.append([int(slot) for slot in slot_line.strip().split()])
             parts = input_line.strip().split()
             raw_intents.append(int(parts[-1]))
-            queries.append(' '.join(parts[:-1]))
+            query = ' '.join(parts[:-1])
+            if do_lower_case:
+                query = query.lower()
+            queries.append(query)
 
         features = get_features(
             queries,
@@ -241,13 +242,17 @@ class BertJointIntentSlotInferDataset(Dataset):
     Args:
         queries (list): list of queries to run inference on
         max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
-        tokenizer (Tokenizer): such as BertTokenizer
+        tokenizer (Tokenizer): such as NemoBertTokenizer
         pad_label (int): pad value use for slot labels.
             by default, it's the neutral label.
 
     """
 
-    def __init__(self, queries, max_seq_length, tokenizer):
+    def __init__(self, queries, max_seq_length, tokenizer, do_lower_case):
+        if do_lower_case:
+            for idx, query in enumerate(queries):
+                queries[idx] = queries[idx].lower()
+
         features = get_features(queries, max_seq_length, tokenizer)
 
         self.all_input_ids = features[0]
@@ -267,139 +272,3 @@ def __getitem__(self, idx):
             np.array(self.all_loss_mask[idx]),
             np.array(self.all_subtokens_mask[idx]),
         )
-
-
-class JointIntentSlotDataDesc:
-    """ Convert the raw data to the standard format supported by
-    JointIntentSlotDataset.
-
-    By default, the None label for slots is 'O'.
-
-    JointIntentSlotDataset requires two files:
-
-        input_file: file to sequence + label.
-            the first line is header (sentence [tab] label)
-            each line should be [sentence][tab][label]
-
-        slot_file: file to slot labels, each line corresponding to
-            slot labels for a sentence in input_file. No header.
-
-    To keep the mapping from label index to label consistent during
-    training and inferencing, we require the following files:
-        dicts.intents.csv: each line is an intent. The first line
-            corresponding to the 0 intent label, the second line
-            corresponding to the 1 intent label, and so on.
-
-        dicts.slots.csv: each line is a slot. The first line
-            corresponding to the 0 slot label, the second line
-            corresponding to the 1 slot label, and so on.
-
-    Args:
-        data_dir (str): the directory of the dataset
-        do_lower_case (bool): whether to set your dataset to lowercase
-        dataset_name (str): the name of the dataset. If it's a dataset
-            that follows the standard JointIntentSlotDataset format,
-            you can set the name as 'default'.
-        none_slot_label (str): the label for slots that aren't indentified
-            defaulted to 'O'
-        pad_label (int): the int used for padding. If set to -1,
-             it'll be set to the whatever the None label is.
-
-    """
-
-    def __init__(self, data_dir, do_lower_case=False, dataset_name='default', none_slot_label='O', pad_label=-1):
-        if dataset_name == 'atis':
-            self.data_dir = process_atis(data_dir, do_lower_case)
-        elif dataset_name == 'snips-atis':
-            self.data_dir, self.pad_label = merge(
-                data_dir, ['ATIS/nemo-processed-uncased', 'snips/nemo-processed-uncased/all'], dataset_name
-            )
-        elif dataset_name == 'dialogflow':
-            self.data_dir = process_dialogflow(data_dir, do_lower_case)
-        elif dataset_name == 'mturk-processed':
-            self.data_dir = process_mturk(data_dir, do_lower_case)
-        elif dataset_name in set(['snips-light', 'snips-speak', 'snips-all']):
-            self.data_dir = process_snips(data_dir, do_lower_case)
-            if dataset_name.endswith('light'):
-                self.data_dir = f'{self.data_dir}/light'
-            elif dataset_name.endswith('speak'):
-                self.data_dir = f'{self.data_dir}/speak'
-            elif dataset_name.endswith('all'):
-                self.data_dir = f'{self.data_dir}/all'
-        elif dataset_name.startswith('jarvis'):
-            self.data_dir = process_jarvis_datasets(
-                data_dir, do_lower_case, dataset_name, modes=["train", "test", "eval"], ignore_prev_intent=False
-            )
-        else:
-            if not if_exist(data_dir, ['dict.intents.csv', 'dict.slots.csv']):
-                raise FileNotFoundError(
-                    "Make sure that your data follows the standard format "
-                    "supported by JointIntentSlotDataset. Your data must "
-                    "contain dict.intents.csv and dict.slots.csv."
-                )
-            self.data_dir = data_dir
-
-        self.intent_dict_file = self.data_dir + '/dict.intents.csv'
-        self.slot_dict_file = self.data_dir + '/dict.slots.csv'
-        self.num_intents = len(get_vocab(self.intent_dict_file))
-        slots = label2idx(self.slot_dict_file)
-        self.num_slots = len(slots)
-
-        for mode in ['train', 'test', 'eval']:
-
-            if not if_exist(self.data_dir, [f'{mode}.tsv']):
-                logging.info(f' Stats calculation for {mode} mode' f' is skipped as {mode}.tsv was not found.')
-                continue
-
-            slot_file = f'{self.data_dir}/{mode}_slots.tsv'
-            with open(slot_file, 'r') as f:
-                slot_lines = f.readlines()
-
-            input_file = f'{self.data_dir}/{mode}.tsv'
-            with open(input_file, 'r') as f:
-                input_lines = f.readlines()[1:]  # Skipping headers at index 0
-
-            if len(slot_lines) != len(input_lines):
-                raise ValueError(
-                    "Make sure that the number of slot lines match the "
-                    "number of intent lines. There should be a 1-1 "
-                    "correspondence between every slot and intent lines."
-                )
-
-            dataset = list(zip(slot_lines, input_lines))
-
-            raw_slots, queries, raw_intents = [], [], []
-            for slot_line, input_line in dataset:
-                slot_list = [int(slot) for slot in slot_line.strip().split()]
-                raw_slots.append(slot_list)
-                parts = input_line.strip().split()
-                raw_intents.append(int(parts[-1]))
-                queries.append(' '.join(parts[:-1]))
-
-            infold = input_file[: input_file.rfind('/')]
-
-            logging.info(f'Three most popular intents during {mode}ing')
-            total_intents, intent_label_freq = get_label_stats(raw_intents, infold + f'/{mode}_intent_stats.tsv')
-            merged_slots = itertools.chain.from_iterable(raw_slots)
-
-            logging.info(f'Three most popular slots during {mode}ing')
-            slots_total, slots_label_freq = get_label_stats(merged_slots, infold + f'/{mode}_slot_stats.tsv')
-
-            if mode == 'train':
-                self.slot_weights = calc_class_weights(slots_label_freq)
-                logging.info(f'Slot weights are - {self.slot_weights}')
-
-                self.intent_weights = calc_class_weights(intent_label_freq)
-                logging.info(f'Intent weights are - {self.intent_weights}')
-
-            logging.info(f'Total intents - {total_intents}')
-            logging.info(f'Intent label frequency - {intent_label_freq}')
-            logging.info(f'Total Slots - {slots_total}')
-            logging.info(f'Slots label frequency - {slots_label_freq}')
-
-        if pad_label != -1:
-            self.pad_label = pad_label
-        else:
-            if none_slot_label not in slots:
-                raise ValueError(f'none_slot_label {none_slot_label} not ' f'found in {self.slot_dict_file}.')
-            self.pad_label = slots[none_slot_label]
diff --git a/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_descriptor.py b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_descriptor.py
new file mode 100644
index 000000000000..bfcdf4e761cd
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/joint_intent_slot_dataset/joint_intent_slot_descriptor.py
@@ -0,0 +1,145 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import itertools
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils import (
+    fill_class_weights,
+    get_freq_weights,
+    get_label_stats,
+    if_exist,
+)
+
+__all__ = ['JointIntentSlotDataDesc']
+
+
+class JointIntentSlotDataDesc:
+    """ Convert the raw data to the standard format supported by
+    JointIntentSlotDataset.
+
+    By default, the None label for slots is 'O'.
+
+    JointIntentSlotDataset requires two files:
+
+        input_file: file to sequence + label.
+            the first line is header (sentence [tab] label)
+            each line should be [sentence][tab][label]
+
+        slot_file: file to slot labels, each line corresponding to
+            slot labels for a sentence in input_file. No header.
+
+    To keep the mapping from label index to label consistent during
+    training and inferencing, we require the following files:
+        dicts.intents.csv: each line is an intent. The first line
+            corresponding to the 0 intent label, the second line
+            corresponding to the 1 intent label, and so on.
+
+        dicts.slots.csv: each line is a slot. The first line
+            corresponding to the 0 slot label, the second line
+            corresponding to the 1 slot label, and so on.
+
+    Args:
+        data_dir (str): the directory of the dataset
+        none_slot_label (str): the label for slots that aren't identified
+            defaulted to 'O'
+        pad_label (int): the int used for padding. If set to -1,
+             it'll be set to the whatever the None label is.
+    """
+
+    def __init__(self, data_dir, none_slot_label='O', pad_label=-1):
+        if not if_exist(data_dir, ['dict.intents.csv', 'dict.slots.csv']):
+            raise FileNotFoundError(
+                "Make sure that your data follows the standard format "
+                "supported by JointIntentSlotDataset. Your data must "
+                "contain dict.intents.csv and dict.slots.csv."
+            )
+
+        self.data_dir = data_dir
+        self.intent_dict_file = self.data_dir + '/dict.intents.csv'
+        self.slot_dict_file = self.data_dir + '/dict.slots.csv'
+
+        self.intents_label_ids = JointIntentSlotDataDesc.label2idx(self.intent_dict_file)
+        self.num_intents = len(self.intents_label_ids)
+        self.slots_label_ids = JointIntentSlotDataDesc.label2idx(self.slot_dict_file)
+        self.num_slots = len(self.slots_label_ids)
+
+        infold = self.data_dir
+        for mode in ['train', 'test', 'dev']:
+            if not if_exist(self.data_dir, [f'{mode}.tsv']):
+                logging.info(f' Stats calculation for {mode} mode' f' is skipped as {mode}.tsv was not found.')
+                continue
+            logging.info(f' Stats calculating for {mode} mode...')
+            slot_file = f'{self.data_dir}/{mode}_slots.tsv'
+            with open(slot_file, 'r') as f:
+                slot_lines = f.readlines()
+
+            input_file = f'{self.data_dir}/{mode}.tsv'
+            with open(input_file, 'r') as f:
+                input_lines = f.readlines()[1:]  # Skipping headers at index 0
+
+            if len(slot_lines) != len(input_lines):
+                raise ValueError(
+                    "Make sure that the number of slot lines match the "
+                    "number of intent lines. There should be a 1-1 "
+                    "correspondence between every slot and intent lines."
+                )
+
+            dataset = list(zip(slot_lines, input_lines))
+
+            raw_slots, raw_intents = [], []
+            for slot_line, input_line in dataset:
+                slot_list = [int(slot) for slot in slot_line.strip().split()]
+                raw_slots.append(slot_list)
+                parts = input_line.strip().split()
+                raw_intents.append(int(parts[-1]))
+
+            logging.info(f'Three most popular intents in {mode} mode:')
+            total_intents, intent_label_freq, max_id = get_label_stats(
+                raw_intents, infold + f'/{mode}_intent_stats.tsv'
+            )
+
+            merged_slots = itertools.chain.from_iterable(raw_slots)
+            logging.info(f'Three most popular slots in {mode} mode:')
+            slots_total, slots_label_freq, max_id = get_label_stats(merged_slots, infold + f'/{mode}_slot_stats.tsv')
+
+            logging.info(f'Total Number of Intents: {total_intents}')
+            logging.info(f'Intent Label Frequencies: {intent_label_freq}')
+            logging.info(f'Total Number of Slots: {slots_total}')
+            logging.info(f'Slots Label Frequencies: {slots_label_freq}')
+
+            if mode == 'train':
+                intent_weights_dict = get_freq_weights(intent_label_freq)
+                logging.info(f'Intent Weights: {intent_weights_dict}')
+                slot_weights_dict = get_freq_weights(slots_label_freq)
+                logging.info(f'Slot Weights: {slot_weights_dict}')
+
+        self.intent_weights = fill_class_weights(intent_weights_dict, self.num_intents - 1)
+        self.slot_weights = fill_class_weights(slot_weights_dict, self.num_slots - 1)
+
+        if pad_label != -1:
+            self.pad_label = pad_label
+        else:
+            if none_slot_label not in self.slots_label_ids:
+                raise ValueError(f'none_slot_label {none_slot_label} not ' f'found in {self.slot_dict_file}.')
+            self.pad_label = self.slots_label_ids[none_slot_label]
+
+    @staticmethod
+    def label2idx(file):
+        lines = open(file, 'r').readlines()
+        lines = [line.strip() for line in lines if line.strip()]
+        labels = {lines[i]: i for i in range(len(lines))}
+        return labels
diff --git a/nemo/collections/nlp/data/datasets/lm_bert_dataset.py b/nemo/collections/nlp/data/datasets/lm_bert_dataset.py
index 61b74f933c60..05c06f3e7dc8 100644
--- a/nemo/collections/nlp/data/datasets/lm_bert_dataset.py
+++ b/nemo/collections/nlp/data/datasets/lm_bert_dataset.py
@@ -24,12 +24,12 @@
 
 import h5py
 import numpy as np
+from sentencepiece import SentencePieceTrainer as SPT
 from torch.utils.data import Dataset
 from tqdm import tqdm
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets.datasets_utils import download_wkt2
-from nemo.collections.nlp.data.datasets.lm_transformer_dataset import create_vocab_mlm
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import DATABASE_EXISTS_TMP, if_exist
 
 __all__ = ['BertPretrainingDataset', 'BertPretrainingPreprocessedDataset']
 
@@ -377,20 +377,77 @@ def __getitem__(self, index):
 
 
 class BERTPretrainingDataDesc:
-    def __init__(self, dataset_name, data_dir, vocab_size, sample_size, special_tokens, train_file=''):
+    def __init__(
+        self, dataset_name, vocab_size, sample_size, special_tokens, train_data, eval_data=None, test_data=None,
+    ):
         if dataset_name == 'wikitext-2':
-            if not os.path.exists(data_dir):
-                data_dir = download_wkt2(data_dir)
-            self.data_dir, self.tokenizer_model = create_vocab_mlm(
-                data_dir, vocab_size, sample_size, special_tokens, train_file
+            if not os.path.exists(train_data):
+                raise FileNotFoundError(
+                    "Dataset not found. Run 'get_wkt2.sh DATA_DIR' from examples/nlp/language_modeling"
+                )
+            self.data_dir, self.tokenizer_model, self.vocab_file = self.create_vocab_mlm(
+                train_data, vocab_size, sample_size, special_tokens
             )
         else:
-            logging.warning(
-                "Looks like you passed a dataset name that isn't "
-                "already supported by NeMo. Please make sure that "
+            raise ValueError(
+                "Looks like you passed a dataset name that isn't already supported by NeMo. Please make sure that "
                 "you build the preprocessing method for it."
             )
 
-        self.train_file = f'{data_dir}/train.txt'
-        self.eval_file = f'{data_dir}/valid.txt'
-        self.test_file = f'{data_dir}/test.txt'
+        self.train_file = train_data
+        self.eval_file = eval_data
+        self.test_file = test_data
+
+    def create_vocab_mlm(
+        self, data_file, vocab_size, sample_size, special_tokens=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'],
+    ):
+        data_dir = os.path.dirname(data_file)
+        vocab = special_tokens[:]
+        bert_dir = f'{data_dir}/bert'
+        if if_exist(bert_dir, ['tokenizer.model']):
+            logging.info(DATABASE_EXISTS_TMP.format('WikiText_BERT', bert_dir))
+            return data_dir, f'{bert_dir}/tokenizer.model', f'{bert_dir}/vocab.txt'
+        logging.info(f'Processing WikiText dataset and store at {bert_dir}')
+        os.makedirs(bert_dir, exist_ok=True)
+
+        if not data_file:
+            files = glob.glob(f'{data_dir}/*.txt')
+            data_file = f'{bert_dir}/merged.txt'
+            logging.info(f"Merging {len(files)} txt files into {data_file}")
+
+            with open(data_file, "w") as merged:
+                for file in tqdm(files):
+                    with open(file, 'r') as inf:
+                        content = inf.read().strip()
+                    merged.write(content + '\n\n\n')
+
+        cmd = (
+            f"--input={data_file} --model_prefix={bert_dir}/tokenizer "
+            f"--vocab_size={vocab_size - len(vocab)} "
+            f"--input_sentence_size={sample_size} "
+            f"--shuffle_input_sentence=true --hard_vocab_limit=false "
+            f"--bos_id=-1 --eos_id=-1"
+        )
+
+        SPT.Train(cmd)
+
+        # Add BERT control symbols
+        tokens = []
+
+        with open(f"{bert_dir}/tokenizer.vocab", "r") as f:
+            f.readline()  # skip first <unk> token
+
+            # Read tokens from each line and parse for vocab
+            for line in f:
+                piece = line.split("\t")[0]
+                token = piece[1:] if piece.startswith("▁") else f"##{piece}"
+                tokens.append(token)
+
+        vocab.extend(tokens)
+
+        # Save vocabulary to output file
+        vocab_file = f'{bert_dir}/vocab.txt'
+        with open(vocab_file, "w") as f:
+            for token in vocab:
+                f.write(f"{token}\n".format())
+        return data_dir, f'{bert_dir}/tokenizer.model', vocab_file
diff --git a/nemo/collections/nlp/data/datasets/lm_transformer_dataset.py b/nemo/collections/nlp/data/datasets/lm_transformer_dataset.py
index 5d8f20723c6e..1659f0d8d072 100644
--- a/nemo/collections/nlp/data/datasets/lm_transformer_dataset.py
+++ b/nemo/collections/nlp/data/datasets/lm_transformer_dataset.py
@@ -15,21 +15,16 @@
 # =============================================================================
 
 """Pytorch Dataset for training Neural Machine Translation."""
-import glob
 import os
-import pickle
 import re
 
 import numpy as np
-from sentencepiece import SentencePieceTrainer as SPT
 from torch.utils.data import Dataset
-from tqdm import tqdm
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets.datasets_utils import DATABASE_EXISTS_TMP, download_wkt2
-from nemo.collections.nlp.utils.common_nlp_utils import if_exist
+from nemo.collections.nlp.data.datasets.datasets_utils import dataset_to_ids, if_exist
 
-__all__ = ['LanguageModelingDataset']
+__all__ = ['LanguageModelingDataset', 'LanguageModelDataDesc']
 
 
 class LanguageModelingDataset(Dataset):
@@ -56,8 +51,10 @@ class LanguageModelDataDesc:
     def __init__(self, dataset_name, data_dir, do_lower_case):
         if dataset_name == 'wikitext-2':
             if not os.path.exists(data_dir):
-                data_dir = download_wkt2(data_dir)
-            self.vocab_size = create_vocab_lm(data_dir, do_lower_case)
+                raise FileNotFoundError(
+                    "Dataset not found. Run 'get_wkt2.sh DATA_DIR' from examples/nlp/language_modeling"
+                )
+            self.vocab_size = self.create_vocab_lm(data_dir, do_lower_case)
             self.data_dir = data_dir
         else:
             logging.warning(
@@ -66,122 +63,33 @@ def __init__(self, dataset_name, data_dir, do_lower_case):
                 "you build the preprocessing method for it."
             )
 
-
-def create_vocab_mlm(
-    data_dir, vocab_size, sample_size, special_tokens=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'], train_file=''
-):
-    vocab = special_tokens[:]
-    bert_dir = f'{data_dir}/bert'
-    if if_exist(bert_dir, ['tokenizer.model']):
-        logging.info(DATABASE_EXISTS_TMP.format('WikiText_BERT', bert_dir))
-        return data_dir, f'{bert_dir}/tokenizer.model'
-    logging.info(f'Processing WikiText dataset and store at {bert_dir}')
-    os.makedirs(bert_dir, exist_ok=True)
-
-    if not train_file:
-        files = glob.glob(f'{data_dir}/*.txt')
-        train_file = f'{bert_dir}/merged.txt'
-        logging.info(f"Merging {len(files)} txt files into {train_file}")
-
-        with open(train_file, "w") as merged:
-            for file in tqdm(files):
-                with open(file, 'r') as inf:
-                    content = inf.read().strip()
-                merged.write(content + '\n\n\n')
-    else:
-        train_file = f'{data_dir}/{train_file}'
-
-    cmd = (
-        f"--input={train_file} --model_prefix={bert_dir}/tokenizer "
-        f"--vocab_size={vocab_size - len(vocab)} "
-        f"--input_sentence_size={sample_size} "
-        f"--shuffle_input_sentence=true --hard_vocab_limit=false "
-        f"--bos_id=-1 --eos_id=-1"
-    )
-    SPT.Train(cmd)
-
-    # Add BERT control symbols
-    tokens = []
-
-    with open(f"{bert_dir}/tokenizer.vocab", "r") as f:
-        f.readline()  # skip first <unk> token
-
-        # Read tokens from each line and parse for vocab
-        for line in f:
-            piece = line.split("\t")[0]
-            token = piece[1:] if piece.startswith("▁") else f"##{piece}"
-            tokens.append(token)
-
-    vocab.extend(tokens)
-
-    # Save vocabulary to output file
-    with open(f'{bert_dir}/vocab.txt', "w") as f:
-        for token in vocab:
-            f.write(f"{token}\n".format())
-    return data_dir, f'{bert_dir}/tokenizer.model'
-
-
-def dataset_to_ids(dataset, tokenizer, cache_ids=False, add_bos_eos=True):
-    """
-    Reads dataset from file line by line, tokenizes each line with tokenizer,
-    and returns list of lists which corresponds to ids of tokenized strings.
-
-    Args:
-        dataset: path to dataset
-        tokenizer: tokenizer to convert text into ids
-        cache_ids: if True, ids are saved to disk as pickle file
-            with similar name (e.g., data.txt --> data.txt.pkl)
-        add_bos_eos: bool, whether to add <s> and </s> symbols (e.g., for NMT)
-    Returns:
-        ids: list of ids which correspond to tokenized strings of the dataset
-    """
-
-    cached_ids_dataset = dataset + str(".pkl")
-    if os.path.isfile(cached_ids_dataset):
-        logging.info("Loading cached tokenized dataset ...")
-        ids = pickle.load(open(cached_ids_dataset, "rb"))
-    else:
-        logging.info("Tokenizing dataset ...")
-        data = open(dataset, "rb").readlines()
-        ids = []
-        for sentence in data:
-            sent_ids = tokenizer.text_to_ids(sentence.decode("utf-8"))
-            if add_bos_eos:
-                sent_ids = [tokenizer.bos_id] + sent_ids + [tokenizer.eos_id]
-            ids.append(sent_ids)
-        if cache_ids:
-            logging.info("Caching tokenized dataset ...")
-            pickle.dump(ids, open(cached_ids_dataset, "wb"))
-    return ids
-
-
-def create_vocab_lm(data_dir, do_lower_case):
-    if if_exist(data_dir, ['train.txt', 'vocab.txt']):
-        logging.info("Vocabulary has been created.")
-        with open(os.path.join(data_dir, 'vocab.txt'), 'r') as f:
-            vocab_size = len(f.readlines())
-        return vocab_size
-
-    logging.info(f'Creating vocabulary from training data at {data_dir}')
-
-    with open(f'{data_dir}/train.txt', 'r') as f:
-        txt = f.read()
-    if do_lower_case:
-        txt = txt.lower()
-    lines = re.split(r'[\n]', txt)
-    sentences = [line.strip().split() for line in lines if line.strip()]
-
-    vocab = {"[PAD]": 0, "[SEP]": 1, "[CLS]": 2, "[MASK]": 3}
-    idx = 4
-    for sentence in sentences:
-        for word in sentence:
-            if word not in vocab:
-                vocab[word] = idx
-                idx += 1
-
-    with open(f'{data_dir}/vocab.txt', 'w') as f:
-        for word in sorted(vocab.keys()):
-            f.write(word + '\n')
-    logging.info(f"Created vocabulary of size {len(vocab)}")
-
-    return len(vocab)
+    def create_vocab_lm(self, data_dir, do_lower_case):
+        if if_exist(data_dir, ['train.txt', 'vocab.txt']):
+            logging.info("Vocabulary has been created.")
+            with open(os.path.join(data_dir, 'vocab.txt'), 'r') as f:
+                vocab_size = len(f.readlines())
+            return vocab_size
+
+        logging.info(f'Creating vocabulary from training data at {data_dir}')
+
+        with open(f'{data_dir}/train.txt', 'r') as f:
+            txt = f.read()
+        if do_lower_case:
+            txt = txt.lower()
+        lines = re.split(r'[\n]', txt)
+        sentences = [line.strip().split() for line in lines if line.strip()]
+
+        vocab = {"[PAD]": 0, "[SEP]": 1, "[CLS]": 2, "[MASK]": 3}
+        idx = 4
+        for sentence in sentences:
+            for word in sentence:
+                if word not in vocab:
+                    vocab[word] = idx
+                    idx += 1
+
+        with open(f'{data_dir}/vocab.txt', 'w') as f:
+            for word in sorted(vocab.keys()):
+                f.write(word + '\n')
+        logging.info(f"Created vocabulary of size {len(vocab)}")
+
+        return len(vocab)
diff --git a/nemo/collections/nlp/data/datasets/machine_translation_dataset.py b/nemo/collections/nlp/data/datasets/machine_translation_dataset.py
index db8e6b7ace2d..3fe43c1f6820 100644
--- a/nemo/collections/nlp/data/datasets/machine_translation_dataset.py
+++ b/nemo/collections/nlp/data/datasets/machine_translation_dataset.py
@@ -21,7 +21,7 @@
 import numpy as np
 from torch.utils.data import Dataset
 
-from nemo.collections.nlp.data.datasets.lm_transformer_dataset import dataset_to_ids
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import dataset_to_ids
 
 __all__ = ['TranslationDataset']
 
@@ -36,7 +36,7 @@ def __init__(self, tokenizer_src, tokenizer_tgt, dataset_src, dataset_tgt, token
         src_ids = dataset_to_ids(dataset_src, tokenizer_src)
         tgt_ids = dataset_to_ids(dataset_tgt, tokenizer_tgt)
         if clean:
-            src_ids, tgt_ids = clean_src_and_target(src_ids, tgt_ids)
+            src_ids, tgt_ids = self.clean_src_and_target(src_ids, tgt_ids)
         self.batch_indices = self.pack_data_into_batches(src_ids, tgt_ids)
         self.batches = self.pad_batches(src_ids, tgt_ids, self.batch_indices)
 
@@ -156,35 +156,36 @@ def pack_data_into_batches(self, src_ids, tgt_ids):
 
         return batches
 
+    def clean_src_and_target(
+        self, src_ids, tgt_ids, max_tokens=128, min_tokens=3, max_tokens_diff=25, max_tokens_ratio=2.5
+    ):
+        """
+        Cleans source and target sentences to get rid of noisy data.
+        Specifically, a pair of sentences is removed if
+          -- either source or target is longer than *max_tokens*
+          -- either source or target is shorter than *min_tokens*
+          -- absolute difference between source and target is larger than
+             *max_tokens_diff*
+          -- one sentence is *max_tokens_ratio* times longer than the other
+        """
 
-def clean_src_and_target(src_ids, tgt_ids, max_tokens=128, min_tokens=3, max_tokens_diff=25, max_tokens_ratio=2.5):
-    """
-    Cleans source and target sentences to get rid of noisy data.
-    Specifically, a pair of sentences is removed if
-      -- either source or target is longer than *max_tokens*
-      -- either source or target is shorter than *min_tokens*
-      -- absolute difference between source and target is larger than
-         *max_tokens_diff*
-      -- one sentence is *max_tokens_ratio* times longer than the other
-    """
-
-    if len(src_ids) != len(tgt_ids):
-        raise ValueError("Source and target corpora have different lengths!")
-    src_ids_, tgt_ids_ = [], []
-    for i in range(len(src_ids)):
-        src_len, tgt_len = len(src_ids[i]), len(tgt_ids[i])
-        if (
-            src_len > max_tokens
-            or tgt_len > max_tokens
-            or src_len < min_tokens
-            or tgt_len < min_tokens
-            or (src_ids[i] == tgt_ids[i])
-            or np.abs(src_len - tgt_len) > max_tokens_diff
-        ):
-            continue
-        ratio = max(src_len - 2, 1) / max(tgt_len - 2, 1)
-        if ratio > max_tokens_ratio or ratio < (1 / max_tokens_ratio):
-            continue
-        src_ids_.append(src_ids[i])
-        tgt_ids_.append(tgt_ids[i])
-    return src_ids_, tgt_ids_
+        if len(src_ids) != len(tgt_ids):
+            raise ValueError("Source and target corpora have different lengths!")
+        src_ids_, tgt_ids_ = [], []
+        for i in range(len(src_ids)):
+            src_len, tgt_len = len(src_ids[i]), len(tgt_ids[i])
+            if (
+                src_len > max_tokens
+                or tgt_len > max_tokens
+                or src_len < min_tokens
+                or tgt_len < min_tokens
+                or (src_ids[i] == tgt_ids[i])
+                or np.abs(src_len - tgt_len) > max_tokens_diff
+            ):
+                continue
+            ratio = max(src_len - 2, 1) / max(tgt_len - 2, 1)
+            if ratio > max_tokens_ratio or ratio < (1 / max_tokens_ratio):
+                continue
+            src_ids_.append(src_ids[i])
+            tgt_ids_.append(tgt_ids[i])
+        return src_ids_, tgt_ids_
diff --git a/nemo/collections/nlp/data/datasets/state_tracking_trade_dataset.py b/nemo/collections/nlp/data/datasets/multiwoz_dataset.py
similarity index 99%
rename from nemo/collections/nlp/data/datasets/state_tracking_trade_dataset.py
rename to nemo/collections/nlp/data/datasets/multiwoz_dataset.py
index 9358c79d16d6..17690034fc93 100644
--- a/nemo/collections/nlp/data/datasets/state_tracking_trade_dataset.py
+++ b/nemo/collections/nlp/data/datasets/multiwoz_dataset.py
@@ -170,7 +170,7 @@ def __getitem__(self, idx):
 
 class Vocab:
     """
-    Vocab class for TRADE model
+    Vocab class for MultiWOZ dataset
     UNK_token = 0
     PAD_token = 1
     SOS_token = 3
diff --git a/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py b/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py
index b2df10907304..84e5e09315f7 100644
--- a/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py
+++ b/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py
@@ -1,6 +1,5 @@
-# Copyright 2018 The Google AI Language Team Authors and
-# The HuggingFace Inc. team.
-# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,24 +12,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-Utility functions for Token Classification NLP tasks
-Some parts of this code were adapted from the HuggingFace library at
-https://github.com/huggingface/pytorch-pretrained-BERT
-"""
+# =============================================================================
 
 __all__ = ['BertPunctuationCapitalizationDataset', 'BertPunctuationCapitalizationInferDataset']
 
 import itertools
 import os
 import pickle
-import random
 
 import numpy as np
 from torch.utils.data import Dataset
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets import datasets_utils as utils
+from nemo.collections.nlp.data.datasets.datasets_utils import get_label_stats, get_stats
 
 
 def get_features(
@@ -49,7 +43,7 @@ def get_features(
     Args:
     queries (list of str): text sequences
     max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
-    tokenizer (Tokenizer): such as NemoBertTokenizer
+    tokenizer (TokenizerSpec): such as NemoBertTokenizer
     pad_label (str): pad value use for labels.
         by default, it's the neutral label.
     punct_label_ids (dict): dict to map punctuation labels to label ids.
@@ -83,7 +77,7 @@ def get_features(
         words = query.strip().split()
 
         # add bos token
-        subtokens = ['[CLS]']
+        subtokens = [tokenizer.cls_token]
         loss_mask = [1 - ignore_start_end]
         subtokens_mask = [0]
         if with_label:
@@ -109,7 +103,7 @@ def get_features(
                 capit_labels.extend([capit_query_labels[j]] * len(word_tokens))
 
         # add eos token
-        subtokens.append('[SEP]')
+        subtokens.append(tokenizer.sep_token)
         loss_mask.append(1 - ignore_start_end)
         subtokens_mask.append(0)
         sent_lengths.append(len(subtokens))
@@ -126,12 +120,12 @@ def get_features(
 
     max_seq_length = min(max_seq_length, max(sent_lengths))
     logging.info(f'Max length: {max_seq_length}')
-    utils.get_stats(sent_lengths)
+    get_stats(sent_lengths)
     too_long_count = 0
 
     for i, subtokens in enumerate(all_subtokens):
         if len(subtokens) > max_seq_length:
-            subtokens = ['[CLS]'] + subtokens[-max_seq_length + 1 :]
+            subtokens = [tokenizer.cls_token] + subtokens[-max_seq_length + 1 :]
             all_input_mask[i] = [1] + all_input_mask[i][-max_seq_length + 1 :]
             all_loss_mask[i] = [int(not ignore_start_end)] + all_loss_mask[i][-max_seq_length + 1 :]
             all_subtokens_mask[i] = [0] + all_subtokens_mask[i][-max_seq_length + 1 :]
@@ -141,7 +135,7 @@ def get_features(
                 capit_all_labels[i] = [pad_id] + capit_all_labels[i][-max_seq_length + 1 :]
             too_long_count += 1
 
-        all_input_ids.append([tokenizer.tokens_to_ids(t) for t in subtokens])
+        all_input_ids.append(tokenizer.tokens_to_ids(subtokens))
 
         if len(subtokens) < max_seq_length:
             extra = max_seq_length - len(subtokens)
@@ -202,7 +196,6 @@ class BertPunctuationCapitalizationDataset(Dataset):
         tokenizer (Tokenizer): such as NemoBertTokenizer
         num_samples (int): number of samples you want to use for the dataset.
             If -1, use all dataset. Useful for testing.
-        shuffle (bool): whether to shuffle your data.
         pad_label (str): pad value use for labels.
             by default, it's the neutral label.
         punct_label_ids and capit_label_ids (dict):
@@ -224,7 +217,6 @@ def __init__(
         max_seq_length,
         tokenizer,
         num_samples=-1,
-        shuffle=False,
         pad_label='O',
         punct_label_ids=None,
         capit_label_ids=None,
@@ -242,7 +234,11 @@ def __init__(
                 raise ValueError("{text_file} should have extension .txt")
 
             filename = filename[:-4]
-            features_pkl = os.path.join(data_dir, filename + "_features.pkl")
+            tokenizer_type = type(tokenizer.tokenizer).__name__
+            vocab_size = getattr(tokenizer, "vocab_size", 0)
+            features_pkl = os.path.join(
+                data_dir, "cached_{}_{}_{}_{}".format(filename, tokenizer_type, str(max_seq_length), str(vocab_size)),
+            )
 
         if use_cache and os.path.exists(features_pkl):
             # If text_file was already processed, load from pickle
@@ -275,17 +271,15 @@ def __init__(
             if len(punct_labels_lines) != len(text_lines):
                 raise ValueError("Labels file should contain labels for every word")
 
-            if shuffle or num_samples > 0:
-                dataset = list(zip(text_lines, punct_labels_lines, capit_labels_lines))
-                random.shuffle(dataset)
+            dataset = list(zip(text_lines, punct_labels_lines, capit_labels_lines))
 
-                if num_samples > 0:
-                    dataset = dataset[:num_samples]
+            if num_samples > 0:
+                dataset = dataset[:num_samples]
 
-                dataset = list(zip(*dataset))
-                text_lines = dataset[0]
-                punct_labels_lines = dataset[1]
-                capit_labels_lines = dataset[2]
+            dataset = list(zip(*dataset))
+            text_lines = dataset[0]
+            punct_labels_lines = dataset[1]
+            capit_labels_lines = dataset[2]
 
             # for dev/test sets use label mapping from training set
             if punct_label_ids:
@@ -351,7 +345,7 @@ def get_stats_and_save(all_labels, label_ids, name):
             infold = text_file[: text_file.rfind('/')]
             merged_labels = itertools.chain.from_iterable(all_labels)
             logging.info('Three most popular labels')
-            _, label_frequencies = utils.get_label_stats(merged_labels, infold + '/label_count_' + name + '.tsv')
+            _, label_frequencies, _ = get_label_stats(merged_labels, infold + '/label_count_' + name + '.tsv')
 
             out = open(os.path.join(infold, name + '_label_ids.csv'), 'w')
             labels, _ = zip(*sorted(label_ids.items(), key=lambda x: x[1]))
diff --git a/nemo/collections/nlp/data/datasets/qa_squad_dataset.py b/nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_dataset.py
similarity index 56%
rename from nemo/collections/nlp/data/datasets/qa_squad_dataset.py
rename to nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_dataset.py
index b927f83ead38..6e5a9720b883 100644
--- a/nemo/collections/nlp/data/datasets/qa_squad_dataset.py
+++ b/nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_dataset.py
@@ -1,20 +1,21 @@
-"""
-Copyright 2018 The Google AI Language Team Authors and
-The HuggingFace Inc. team.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
 import collections
 import json
 import os
@@ -26,7 +27,9 @@
 from tqdm import tqdm
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets.glue_benchmark_dataset import DataProcessor
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import is_whitespace
+from nemo.collections.nlp.data.datasets.datasets_utils.datasets_processing import DataProcessor
+from nemo.collections.nlp.data.datasets.qa_squad_dataset.qa_squad_processing import convert_examples_to_features
 from nemo.collections.nlp.metrics.squad_metrics import (
     _get_best_indexes,
     apply_no_ans_threshold,
@@ -36,10 +39,9 @@
     get_final_text,
     make_eval_dict,
     merge_eval,
-    normalize_answer,
 )
-from nemo.collections.nlp.utils.common_nlp_utils import _is_whitespace
-from nemo.collections.nlp.utils.loss_utils import _compute_softmax
+from nemo.collections.nlp.utils.data_utils import normalize_answer
+from nemo.collections.nlp.utils.functional_utils import _compute_softmax
 
 __all__ = ['SquadDataset']
 
@@ -55,7 +57,7 @@ class SquadDataset(Dataset):
     Creates SQuAD dataset for Question Answering.
 
     Args:
-        data_file (str): train.*.json or dev.*.json.
+        data_file (str): train.*.json eval.*.json or test.*.json.
         tokenizer (obj): Tokenizer object, e.g. NemoBertTokenizer.
         version_2_with_negative (bool): True if training should allow
             unanswerable questions.
@@ -67,70 +69,81 @@ class SquadDataset(Dataset):
         max_seq_length (int): All training files which have a duration more
             than max_duration are dropped. Can't be used if the `utt2dur` file
             does not exist. Defaults to None.
-        mode (str): Use "train" or "dev" to define between
+        mode (str): Use "train", "eval" or "test" to define between
             training and evaluation.
     """
 
     def __init__(
-        self, data_file, tokenizer, doc_stride, max_query_length, max_seq_length, version_2_with_negative, mode
+        self,
+        data_file,
+        tokenizer,
+        doc_stride,
+        max_query_length,
+        max_seq_length,
+        version_2_with_negative,
+        mode,
+        use_cache,
     ):
         self.tokenizer = tokenizer
         self.version_2_with_negative = version_2_with_negative
         self.processor = SquadProcessor(data_file=data_file, mode=mode)
         self.mode = mode
-        if mode != "dev" and mode != "train":
-            raise ValueError(f"mode should be either 'train' or 'dev' but got {mode}")
+        if mode not in ["eval", "train", "test"]:
+            raise ValueError(f"mode should be either 'train', 'eval', or 'test' but got {mode}")
         self.examples = self.processor.get_examples()
 
-        if mode == "train":
-            cached_train_features_file = (
-                data_file
-                + '_cache'
-                + '_{0}_{1}_{2}_{3}'.format(mode, str(max_seq_length), str(doc_stride), str(max_query_length))
+        tokenizer_type = type(tokenizer.tokenizer).__name__
+        vocab_size = getattr(tokenizer, "vocab_size", 0)
+        cached_features_file = (
+            data_file
+            + '_cache'
+            + '_{}_{}_{}_{}_{}_{}'.format(
+                mode, tokenizer_type, str(vocab_size), str(max_seq_length), str(doc_stride), str(max_query_length)
             )
+        )
 
-            if os.path.exists(cached_train_features_file):
-                with open(cached_train_features_file, "rb") as reader:
-                    self.features = pickle.load(reader)
-            else:
-                self.features = convert_examples_to_features(
-                    examples=self.examples,
-                    tokenizer=tokenizer,
-                    max_seq_length=max_seq_length,
-                    doc_stride=doc_stride,
-                    max_query_length=max_query_length,
-                    has_groundtruth=True,
-                )
-                master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
-                if master_device:
-                    logging.info("  Saving train features into cached file %s", cached_train_features_file)
-                    with open(cached_train_features_file, "wb") as writer:
-                        pickle.dump(self.features, writer)
-        elif mode == "dev":
+        if use_cache and os.path.exists(cached_features_file):
+            logging.info(f"loading from {cached_features_file}")
+            with open(cached_features_file, "rb") as reader:
+                self.features = pickle.load(reader)
+        else:
             self.features = convert_examples_to_features(
                 examples=self.examples,
                 tokenizer=tokenizer,
                 max_seq_length=max_seq_length,
                 doc_stride=doc_stride,
                 max_query_length=max_query_length,
-                has_groundtruth=True,
+                has_groundtruth=mode != "test",
             )
-        else:
-            raise Exception
+
+            if use_cache:
+                master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+                if master_device:
+                    logging.info("  Saving train features into cached file %s", cached_features_file)
+                    with open(cached_features_file, "wb") as writer:
+                        pickle.dump(self.features, writer)
 
     def __len__(self):
         return len(self.features)
 
     def __getitem__(self, idx):
         feature = self.features[idx]
-        return (
-            np.array(feature.input_ids),
-            np.array(feature.segment_ids),
-            np.array(feature.input_mask),
-            np.array(feature.start_position),
-            np.array(feature.end_position),
-            np.array(feature.unique_id),
-        )
+        if self.mode == "test":
+            return (
+                np.array(feature.input_ids),
+                np.array(feature.segment_ids),
+                np.array(feature.input_mask),
+                np.array(feature.unique_id),
+            )
+        else:
+            return (
+                np.array(feature.input_ids),
+                np.array(feature.segment_ids),
+                np.array(feature.input_mask),
+                np.array(feature.unique_id),
+                np.array(feature.start_position),
+                np.array(feature.end_position),
+            )
 
     def get_predictions(
         self,
@@ -296,8 +309,10 @@ def get_predictions(
                 output = collections.OrderedDict()
                 output["text"] = entry.text
                 output["probability"] = probs[i]
-                output["start_logit"] = entry.start_logit
-                output["end_logit"] = entry.end_logit
+                output["start_logit"] = (
+                    entry.start_logit if isinstance(entry.start_logit, float) else list(entry.start_logit)
+                )
+                output["end_logit"] = entry.end_logit if isinstance(entry.end_logit, float) else list(entry.end_logit)
                 nbest_json.append(output)
 
             assert len(nbest_json) >= 1
@@ -313,7 +328,7 @@ def get_predictions(
                     all_predictions[example.qas_id] = ""
                 else:
                     all_predictions[example.qas_id] = best_non_null_entry.text
-                all_nbest_json[example.qas_id] = nbest_json
+            all_nbest_json[example.qas_id] = nbest_json
 
         return all_predictions, all_nbest_json, scores_diff_json
 
@@ -400,210 +415,7 @@ def evaluate(
 
         exact_match, f1 = self.evaluate_predictions(all_predictions)
 
-        return exact_match, f1, all_predictions
-
-
-def convert_examples_to_features(
-    examples, tokenizer, max_seq_length, doc_stride, max_query_length, has_groundtruth,
-):
-    """Loads a data file into a list of `InputBatch`s."""
-
-    unique_id = 1000000000
-
-    features = []
-    for (example_index, example) in enumerate(examples):
-        query_tokens = tokenizer.text_to_tokens(example.question_text)
-
-        if len(query_tokens) > max_query_length:
-            query_tokens = query_tokens[0:max_query_length]
-
-        # context: index of token -> index of word
-        tok_to_orig_index = []
-        # context: index of word -> index of first token in token list
-        orig_to_tok_index = []
-        # context without white spaces after tokenization
-        all_doc_tokens = []
-        # doc tokens is word separated context
-        for (i, token) in enumerate(example.doc_tokens):
-            orig_to_tok_index.append(len(all_doc_tokens))
-            sub_tokens = tokenizer.text_to_tokens(token)
-            for sub_token in sub_tokens:
-                tok_to_orig_index.append(i)
-                all_doc_tokens.append(sub_token)
-
-        # idx of query token start and end in context
-        tok_start_position = None
-        tok_end_position = None
-        if has_groundtruth and example.is_impossible:
-            tok_start_position = -1
-            tok_end_position = -1
-        if has_groundtruth and not example.is_impossible:
-            tok_start_position = orig_to_tok_index[example.start_position]
-            if example.end_position < len(example.doc_tokens) - 1:
-                tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
-            else:
-                tok_end_position = len(all_doc_tokens) - 1
-
-            (tok_start_position, tok_end_position) = _improve_answer_span(
-                all_doc_tokens, tok_start_position, tok_end_position, tokenizer, example.answer_text
-            )
-
-        # The -3 accounts for tokenizer.cls_token, tokenizer.sep_token and tokenizer.eos_token
-        # doc_spans contains all possible contexts options of given length
-        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
-        _DocSpan = collections.namedtuple("DocSpan", ["start", "length"])
-        doc_spans = []
-        start_offset = 0
-        while start_offset < len(all_doc_tokens):
-            length = len(all_doc_tokens) - start_offset
-            if length > max_tokens_for_doc:
-                length = max_tokens_for_doc
-            doc_spans.append(_DocSpan(start=start_offset, length=length))
-            if start_offset + length == len(all_doc_tokens):
-                break
-            start_offset += min(length, doc_stride)
-
-        for (doc_span_index, doc_span) in enumerate(doc_spans):
-            tokens = []
-            # maps context tokens idx in final input -> word idx in context
-            token_to_orig_map = {}
-            token_is_max_context = {}
-            segment_ids = []
-            tokens.append(tokenizer.bos_token)
-            segment_ids.append(0)
-            for token in query_tokens:
-                tokens.append(token)
-                segment_ids.append(0)
-            tokens.append(tokenizer.sep_token)
-            segment_ids.append(0)
-
-            for i in range(doc_span.length):
-                split_token_index = doc_span.start + i
-                token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
-
-                is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index)
-                token_is_max_context[len(tokens)] = is_max_context
-                tokens.append(all_doc_tokens[split_token_index])
-                segment_ids.append(1)
-            tokens.append(tokenizer.eos_token)
-            segment_ids.append(1)
-
-            input_ids = tokenizer.tokens_to_ids(tokens)
-
-            # The mask has 1 for real tokens and 0 for padding tokens.
-            # Only real tokens are attended to.
-            input_mask = [1] * len(input_ids)
-
-            # Zero-pad up to the sequence length.
-            while len(input_ids) < max_seq_length:
-                input_ids.append(tokenizer.pad_id)
-                input_mask.append(0)
-                segment_ids.append(0)
-
-            assert len(input_ids) == max_seq_length
-            assert len(input_mask) == max_seq_length
-            assert len(segment_ids) == max_seq_length
-
-            # calculate start and end position in final array
-            # of tokens in answer if no answer,
-            # 0 for both pointing to tokenizer.cls_token
-            start_position = None
-            end_position = None
-            if has_groundtruth and not example.is_impossible:
-                doc_start = doc_span.start
-                doc_end = doc_span.start + doc_span.length - 1
-                out_of_span = False
-                if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
-                    out_of_span = True
-                if out_of_span:
-                    start_position = 0
-                    end_position = 0
-                else:
-                    doc_offset = len(query_tokens) + 2
-                    start_position = tok_start_position - doc_start + doc_offset
-                    end_position = tok_end_position - doc_start + doc_offset
-            if has_groundtruth and example.is_impossible:
-                # if our document chunk does not contain
-                # an annotation we throw it out, since there is nothing
-                # to predict.
-                start_position = 0
-                end_position = 0
-
-            if example_index < 1:
-                logging.info("*** Example ***")
-                logging.info("unique_id: %s" % (unique_id))
-                logging.info("example_index: %s" % (example_index))
-                logging.info("doc_span_index: %s" % (doc_span_index))
-                logging.info("tokens: %s" % " ".join(tokens))
-                logging.info(
-                    "token_to_orig_map: %s" % " ".join(["%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])
-                )
-                logging.info(
-                    "token_is_max_context: %s"
-                    % " ".join(["%d:%s" % (x, y) for (x, y) in token_is_max_context.items()])
-                )
-                logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
-                logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
-                logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
-                if has_groundtruth and example.is_impossible:
-                    logging.info("impossible example")
-                if has_groundtruth and not example.is_impossible:
-                    answer_text = " ".join(tokens[start_position : (end_position + 1)])
-                    logging.info("start_position: %d" % (start_position))
-                    logging.info("end_position: %d" % (end_position))
-                    logging.info("answer: %s" % (answer_text))
-
-            features.append(
-                InputFeatures(
-                    unique_id=unique_id,
-                    example_index=example_index,
-                    doc_span_index=doc_span_index,
-                    tokens=tokens,
-                    token_to_orig_map=token_to_orig_map,
-                    token_is_max_context=token_is_max_context,
-                    input_ids=input_ids,
-                    input_mask=input_mask,
-                    segment_ids=segment_ids,
-                    start_position=start_position,
-                    end_position=end_position,
-                    is_impossible=example.is_impossible,
-                )
-            )
-            unique_id += 1
-
-    return features
-
-
-class InputFeatures(object):
-    """A single set of features of data."""
-
-    def __init__(
-        self,
-        unique_id,
-        example_index,
-        doc_span_index,
-        tokens,
-        token_to_orig_map,
-        token_is_max_context,
-        input_ids,
-        input_mask,
-        segment_ids,
-        start_position=None,
-        end_position=None,
-        is_impossible=None,
-    ):
-        self.unique_id = unique_id
-        self.example_index = example_index
-        self.doc_span_index = doc_span_index
-        self.tokens = tokens
-        self.token_to_orig_map = token_to_orig_map
-        self.token_is_max_context = token_is_max_context
-        self.input_ids = input_ids
-        self.input_mask = input_mask
-        self.segment_ids = segment_ids
-        self.start_position = start_position
-        self.end_position = end_position
-        self.is_impossible = is_impossible
+        return exact_match, f1, all_predictions, all_nbest_json
 
 
 class SquadProcessor(DataProcessor):
@@ -646,11 +458,11 @@ def _create_examples(self, input_data, set_type):
                         is_impossible = False
 
                     if not is_impossible:
-                        if set_type == "train" or set_type == "dev":
+                        if set_type == "train" or set_type == "eval":
                             answer = qa["answers"][0]
                             answer_text = answer["text"]
                             start_position_character = answer["answer_start"]
-                        if set_type == "dev":
+                        if set_type == "eval":
                             answers = qa["answers"]
 
                     example = SquadExample(
@@ -716,7 +528,7 @@ def __init__(
         #     char_to_word_offset = [0, 0, 0, 1, 1]
         #     doc_tokens = ["hi", "yo"]
         for c in self.context_text:
-            if _is_whitespace(c):
+            if is_whitespace(c):
                 prev_is_whitespace = True
             else:
                 if prev_is_whitespace:
@@ -736,79 +548,3 @@ def __init__(
             self.end_position = char_to_word_offset[
                 min(start_position_character + len(answer_text) - 1, len(char_to_word_offset) - 1)
             ]
-
-
-def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text):
-    """Returns tokenized answer spans that
-    better match the annotated answer."""
-    tok_answer_text = " ".join(tokenizer.text_to_tokens(orig_answer_text))
-
-    for new_start in range(input_start, input_end + 1):
-        for new_end in range(input_end, new_start - 1, -1):
-            text_span = " ".join(doc_tokens[new_start : (new_end + 1)])
-            if text_span == tok_answer_text:
-                return (new_start, new_end)
-
-    return (input_start, input_end)
-
-
-def _check_is_max_context(doc_spans, cur_span_index, position):
-    """Check if this is the 'max context' doc span for the token."""
-    best_score = None
-    best_span_index = None
-    for (span_index, doc_span) in enumerate(doc_spans):
-        end = doc_span.start + doc_span.length - 1
-        if position < doc_span.start:
-            continue
-        if position > end:
-            continue
-        num_left_context = position - doc_span.start
-        num_right_context = end - position
-        score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
-        if best_score is None or score > best_score:
-            best_score = score
-            best_span_index = span_index
-
-    return cur_span_index == best_span_index
-
-
-def check_is_max_context(doc_spans, cur_span_index, position):
-    """Check if this is the 'max context' doc span for the token.
-
-    Because of the sliding window approach taken to scoring documents,
-    a single token can appear in multiple documents.
-
-    Example:
-        Doc: the man went to the store and bought a gallon of milk
-        Span A: the man went to the
-        Span B: to the store and bought
-        Span C: and bought a gallon of
-        ...
-
-    Now the word 'bought' will have two scores from spans B and C. We only
-    want to consider the score with "maximum context", which we define as
-    the *minimum* of its left and right context (the *sum* of left and
-    right context will always be the same, of course).
-
-    In the example the maximum context for 'bought' would be span C since
-    it has 1 left context and 3 right context, while span B has 4 left context
-    and 0 right context.
-
-    Code adapted from the code by the Google AI and HuggingFace.
-    """
-    best_score = None
-    best_span_index = None
-    for (span_index, doc_span) in enumerate(doc_spans):
-        end = doc_span.start + doc_span.length - 1
-        if position < doc_span.start:
-            continue
-        if position > end:
-            continue
-        num_left_context = position - doc_span.start
-        num_right_context = end - position
-        score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
-        if best_score is None or score > best_score:
-            best_score = score
-            best_span_index = span_index
-
-    return cur_span_index == best_span_index
diff --git a/nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_processing.py b/nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_processing.py
new file mode 100644
index 000000000000..22d0947e05e6
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/qa_squad_dataset/qa_squad_processing.py
@@ -0,0 +1,280 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import collections
+
+from nemo import logging
+
+
+def convert_examples_to_features(
+    examples, tokenizer, max_seq_length, doc_stride, max_query_length, has_groundtruth,
+):
+    """Loads a data file into a list of `InputBatch`s."""
+
+    unique_id = 1000000000
+
+    features = []
+    for (example_index, example) in enumerate(examples):
+        query_tokens = tokenizer.text_to_tokens(example.question_text)
+
+        if len(query_tokens) > max_query_length:
+            query_tokens = query_tokens[0:max_query_length]
+
+        # context: index of token -> index of word
+        tok_to_orig_index = []
+        # context: index of word -> index of first token in token list
+        orig_to_tok_index = []
+        # context without white spaces after tokenization
+        all_doc_tokens = []
+        # doc tokens is word separated context
+        for (i, token) in enumerate(example.doc_tokens):
+            orig_to_tok_index.append(len(all_doc_tokens))
+            sub_tokens = tokenizer.text_to_tokens(token)
+            for sub_token in sub_tokens:
+                tok_to_orig_index.append(i)
+                all_doc_tokens.append(sub_token)
+
+        # idx of query token start and end in context
+        tok_start_position = None
+        tok_end_position = None
+        if has_groundtruth and example.is_impossible:
+            tok_start_position = -1
+            tok_end_position = -1
+        if has_groundtruth and not example.is_impossible:
+            tok_start_position = orig_to_tok_index[example.start_position]
+            if example.end_position < len(example.doc_tokens) - 1:
+                tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
+            else:
+                tok_end_position = len(all_doc_tokens) - 1
+
+            (tok_start_position, tok_end_position) = _improve_answer_span(
+                all_doc_tokens, tok_start_position, tok_end_position, tokenizer, example.answer_text
+            )
+
+        # The -3 accounts for tokenizer.cls_token, tokenizer.sep_token and tokenizer.eos_token
+        # doc_spans contains all possible contexts options of given length
+        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+        _DocSpan = collections.namedtuple("DocSpan", ["start", "length"])
+        doc_spans = []
+        start_offset = 0
+        while start_offset < len(all_doc_tokens):
+            length = len(all_doc_tokens) - start_offset
+            if length > max_tokens_for_doc:
+                length = max_tokens_for_doc
+            doc_spans.append(_DocSpan(start=start_offset, length=length))
+            if start_offset + length == len(all_doc_tokens):
+                break
+            start_offset += min(length, doc_stride)
+
+        for (doc_span_index, doc_span) in enumerate(doc_spans):
+            tokens = []
+            # maps context tokens idx in final input -> word idx in context
+            token_to_orig_map = {}
+            token_is_max_context = {}
+            segment_ids = []
+            tokens.append(tokenizer.bos_token)
+            segment_ids.append(0)
+            for token in query_tokens:
+                tokens.append(token)
+                segment_ids.append(0)
+            tokens.append(tokenizer.sep_token)
+            segment_ids.append(0)
+
+            for i in range(doc_span.length):
+                split_token_index = doc_span.start + i
+                token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+                is_max_context = _check_is_max_context(doc_spans, doc_span_index, split_token_index)
+                token_is_max_context[len(tokens)] = is_max_context
+                tokens.append(all_doc_tokens[split_token_index])
+                segment_ids.append(1)
+            tokens.append(tokenizer.eos_token)
+            segment_ids.append(1)
+
+            input_ids = tokenizer.tokens_to_ids(tokens)
+
+            # The mask has 1 for real tokens and 0 for padding tokens.
+            # Only real tokens are attended to.
+            input_mask = [1] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            while len(input_ids) < max_seq_length:
+                input_ids.append(tokenizer.pad_id)
+                input_mask.append(0)
+                segment_ids.append(0)
+
+            assert len(input_ids) == max_seq_length
+            assert len(input_mask) == max_seq_length
+            assert len(segment_ids) == max_seq_length
+
+            # calculate start and end position in final array
+            # of tokens in answer if no answer,
+            # 0 for both pointing to tokenizer.cls_token
+            start_position = None
+            end_position = None
+            if has_groundtruth and not example.is_impossible:
+                doc_start = doc_span.start
+                doc_end = doc_span.start + doc_span.length - 1
+                out_of_span = False
+                if not (tok_start_position >= doc_start and tok_end_position <= doc_end):
+                    out_of_span = True
+                if out_of_span:
+                    start_position = 0
+                    end_position = 0
+                else:
+                    doc_offset = len(query_tokens) + 2
+                    start_position = tok_start_position - doc_start + doc_offset
+                    end_position = tok_end_position - doc_start + doc_offset
+            if has_groundtruth and example.is_impossible:
+                # if our document chunk does not contain
+                # an annotation we throw it out, since there is nothing
+                # to predict.
+                start_position = 0
+                end_position = 0
+
+            if example_index < 1:
+                logging.info("*** Example ***")
+                logging.info("unique_id: %s" % (unique_id))
+                logging.info("example_index: %s" % (example_index))
+                logging.info("doc_span_index: %s" % (doc_span_index))
+                logging.info("tokens: %s" % " ".join(tokens))
+                logging.info(
+                    "token_to_orig_map: %s" % " ".join(["%d:%d" % (x, y) for (x, y) in token_to_orig_map.items()])
+                )
+                logging.info(
+                    "token_is_max_context: %s"
+                    % " ".join(["%d:%s" % (x, y) for (x, y) in token_is_max_context.items()])
+                )
+                logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
+                logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
+                if has_groundtruth and example.is_impossible:
+                    logging.info("impossible example")
+                if has_groundtruth and not example.is_impossible:
+                    answer_text = " ".join(tokens[start_position : (end_position + 1)])
+                    logging.info("start_position: %d" % (start_position))
+                    logging.info("end_position: %d" % (end_position))
+                    logging.info("answer: %s" % (answer_text))
+
+            features.append(
+                InputFeatures(
+                    unique_id=unique_id,
+                    example_index=example_index,
+                    doc_span_index=doc_span_index,
+                    tokens=tokens,
+                    token_to_orig_map=token_to_orig_map,
+                    token_is_max_context=token_is_max_context,
+                    input_ids=input_ids,
+                    input_mask=input_mask,
+                    segment_ids=segment_ids,
+                    start_position=start_position,
+                    end_position=end_position,
+                    is_impossible=example.is_impossible,
+                )
+            )
+            unique_id += 1
+
+    return features
+
+
+def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer, orig_answer_text):
+    """Returns tokenized answer spans that
+    better match the annotated answer."""
+    tok_answer_text = " ".join(tokenizer.text_to_tokens(orig_answer_text))
+
+    for new_start in range(input_start, input_end + 1):
+        for new_end in range(input_end, new_start - 1, -1):
+            text_span = " ".join(doc_tokens[new_start : (new_end + 1)])
+            if text_span == tok_answer_text:
+                return (new_start, new_end)
+
+    return (input_start, input_end)
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+    """Check if this is the 'max context' doc span for the token.
+
+    Because of the sliding window approach taken to scoring documents,
+    a single token can appear in multiple documents.
+
+    Example:
+        Doc: the man went to the store and bought a gallon of milk
+        Span A: the man went to the
+        Span B: to the store and bought
+        Span C: and bought a gallon of
+        ...
+
+    Now the word 'bought' will have two scores from spans B and C. We only
+    want to consider the score with "maximum context", which we define as
+    the *minimum* of its left and right context (the *sum* of left and
+    right context will always be the same, of course).
+
+    In the example the maximum context for 'bought' would be span C since
+    it has 1 left context and 3 right context, while span B has 4 left context
+    and 0 right context.
+
+    Code adapted from the code by the Google AI and HuggingFace.
+    """
+    best_score = None
+    best_span_index = None
+    for (span_index, doc_span) in enumerate(doc_spans):
+        end = doc_span.start + doc_span.length - 1
+        if position < doc_span.start:
+            continue
+        if position > end:
+            continue
+        num_left_context = position - doc_span.start
+        num_right_context = end - position
+        score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+        if best_score is None or score > best_score:
+            best_score = score
+            best_span_index = span_index
+
+    return cur_span_index == best_span_index
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+
+    def __init__(
+        self,
+        unique_id,
+        example_index,
+        doc_span_index,
+        tokens,
+        token_to_orig_map,
+        token_is_max_context,
+        input_ids,
+        input_mask,
+        segment_ids,
+        start_position=None,
+        end_position=None,
+        is_impossible=None,
+    ):
+        self.unique_id = unique_id
+        self.example_index = example_index
+        self.doc_span_index = doc_span_index
+        self.tokens = tokens
+        self.token_to_orig_map = token_to_orig_map
+        self.token_is_max_context = token_is_max_context
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/data_processor.py b/nemo/collections/nlp/data/datasets/sgd_dataset/data_processor.py
new file mode 100644
index 000000000000..fa59ad148f04
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/data_processor.py
@@ -0,0 +1,418 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/data_utils.py
+"""
+
+import json
+import os
+import re
+
+import numpy as np
+import torch
+
+from nemo.collections.nlp.data.datasets.sgd_dataset.input_example import InputExample
+from nemo.utils import logging
+
+__all__ = ['FILE_RANGES', 'PER_FRAME_OUTPUT_FILENAME', 'SGDDataProcessor', 'get_dialogue_files']
+
+
+FILE_RANGES = {
+    "sgd_single_domain": {"train": range(1, 44), "dev": range(1, 8), "test": range(1, 12)},
+    "sgd_multi_domain": {"train": range(44, 128), "dev": range(8, 21), "test": range(12, 35)},
+    "sgd_all": {"train": range(1, 128), "dev": range(1, 21), "test": range(1, 35)},
+    "multiwoz": {"train": range(1, 18), "dev": range(1, 3), "test": range(1, 3)},
+    "debug_sample": {"train": range(1, 2), "dev": range(1, 2), "test": range(1, 2)},
+}
+
+# Name of the file containing all predictions and their corresponding frame metrics.
+PER_FRAME_OUTPUT_FILENAME = "dialogues_and_metrics.json"
+
+
+class SGDDataProcessor(object):
+    """Data generator for SGD dialogues."""
+
+    def __init__(
+        self, task_name, data_dir, dialogues_example_dir, tokenizer, schema_emb_processor, overwrite_dial_files=False,
+    ):
+        """
+        Constructs SGD8DataProcessor
+        Args:
+            task_name (str): task  name, for  example, "single_domain"
+            data_dir (str): path to data directory
+            dialogues_example_dir (str): path to  store processed dialogue examples
+            tokenizer (Tokenizer): such as NemoBertTokenizer
+            schema_emb_processor (Obj): contains information about schemas
+            overwrite_dial_files (bool): whether to overwite dialogue files
+        """
+        self.data_dir = data_dir
+        self.dialogues_examples_dir = dialogues_example_dir
+
+        self._task_name = task_name
+        self.schema_config = schema_emb_processor.schema_config
+
+        train_file_range = FILE_RANGES[task_name]["train"]
+        dev_file_range = FILE_RANGES[task_name]["dev"]
+        test_file_range = FILE_RANGES[task_name]["test"]
+
+        self._file_ranges = {
+            "train": train_file_range,
+            "dev": dev_file_range,
+            "test": test_file_range,
+        }
+
+        self._seen_services = {
+            "train": set(),
+            "dev": set(),
+            "test": set(),
+        }
+
+        self._tokenizer = tokenizer
+        self._max_seq_length = self.schema_config["MAX_SEQ_LENGTH"]
+
+        self.dial_files = {}
+
+        for dataset in ["train", "dev", "test"]:
+            # Process dialogue files
+            dial_file = f"{task_name}_{dataset}_examples.processed"
+            dial_file = os.path.join(dialogues_example_dir, dial_file)
+            self.dial_files[(task_name, dataset)] = dial_file
+
+            dialog_paths = SGDDataProcessor.get_dialogue_files(data_dir, dataset, task_name)
+            dialogs = SGDDataProcessor.load_dialogues(dialog_paths)
+            for dialog in dialogs:
+                self._seen_services[dataset].update(set(dialog['services']))
+
+            if not os.path.exists(dial_file) or overwrite_dial_files:
+                logging.debug(f"Start generating the dialogue examples for {dataset} dataset.")
+                master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+                if master_device:
+                    if not os.path.exists(dialogues_example_dir):
+                        os.makedirs(dialogues_example_dir)
+                    dial_examples = self._generate_dialog_examples(dataset, schema_emb_processor.schemas)
+                    with open(dial_file, "wb") as f:
+                        np.save(f, dial_examples)
+                        f.close()
+                    logging.debug(f"The dialogue examples for {dataset} dataset saved at {dial_file}")
+                logging.debug(f"Finish generating the dialogue examples for {dataset} dataset.")
+
+            # wait until the master process writes to the dialogue processed file
+            if torch.distributed.is_initialized():
+                torch.distributed.barrier()
+
+    def get_dialog_examples(self, dataset):
+        """
+        Returns a list of `InputExample`s of the data splits' dialogues.
+        Args:
+          dataset(str): can be "train", "dev", or "test".
+        Returns:
+          examples: a list of `InputExample`s.
+        """
+        if (self._task_name, dataset) not in self.dial_files or not os.path.exists(
+            self.dial_files[(self._task_name, dataset)]
+        ):
+            raise ValueError(
+                f"{dataset} dialogue examples were not processed for {self._task_name} task. Re-initialize SGDDataProcessor and add {dataset} dataset to datasets arg."
+            )
+        dial_file = self.dial_files[(self._task_name, dataset)]
+        logging.info(f"Loading dialogue examples from {dial_file}.")
+        with open(dial_file, "rb") as f:
+            dial_examples = np.load(f, allow_pickle=True)
+            f.close()
+        return dial_examples
+
+    def get_seen_services(self, dataset_split):
+        return self._seen_services[dataset_split]
+
+    def _generate_dialog_examples(self, dataset, schemas):
+        """
+        Returns a list of `InputExample`s of the data splits' dialogues.
+        Args:
+          dataset(str): can be "train", "dev", or "test".
+          schemas(Schema): for all services and all datasets processed by the schema_processor
+        Returns:
+          examples: a list of `InputExample`s.
+        """
+        logging.info(f'Creating examples from the dialogues started...')
+        dialog_paths = [
+            os.path.join(self.data_dir, dataset, "dialogues_{:03d}.json".format(i)) for i in self._file_ranges[dataset]
+        ]
+        dialogs = SGDDataProcessor.load_dialogues(dialog_paths)
+
+        examples = []
+        for dialog_idx, dialog in enumerate(dialogs):
+            if dialog_idx % 1000 == 0:
+                logging.info(f'Processed {dialog_idx} dialogs.')
+            examples.extend(self._create_examples_from_dialog(dialog, schemas, dataset))
+
+        logging.info(f'Finished creating the examples from {len(dialogs)} dialogues.')
+        return examples
+
+    def _create_examples_from_dialog(self, dialog, schemas, dataset):
+        """
+        Create examples for every turn in the dialog.
+        Args:
+            dialog (dict): dialogue example
+            schemas(Schema): for all services and all datasets processed by the schema_processor
+            dataset(str): can be "train", "dev", or "test".
+        Returns:
+            examples: a list of `InputExample`s.
+        """
+        dialog_id = dialog["dialogue_id"]
+        prev_states = {}
+        examples = []
+        for turn_idx, turn in enumerate(dialog["turns"]):
+            # Generate an example for every frame in every user turn.
+            if turn["speaker"] == "USER":
+                user_utterance = turn["utterance"]
+                user_frames = {f["service"]: f for f in turn["frames"]}
+                if turn_idx > 0:
+                    system_turn = dialog["turns"][turn_idx - 1]
+                    system_utterance = system_turn["utterance"]
+                    system_frames = {f["service"]: f for f in system_turn["frames"]}
+                else:
+                    system_utterance = ""
+                    system_frames = {}
+
+                turn_id = "{}-{}-{:02d}".format(dataset, dialog_id, turn_idx)
+                turn_examples, prev_states = self._create_examples_from_turn(
+                    turn_id, system_utterance, user_utterance, system_frames, user_frames, prev_states, schemas
+                )
+                examples.extend(turn_examples)
+        return examples
+
+    def _get_state_update(self, current_state, prev_state):
+        """
+        Updates dialogue state
+        Args:
+            current_state (dict): dict of slot - slot values pairs for the current dialogue turn
+            prev_state (dict): dict of slot - slot values pairs for the previous dialogue turns
+        Returns:
+            state_update (dict): dict of slot - slot values pairs that very added/updated during the current
+                dialogue turn
+        """
+        state_update = dict(current_state)
+        for slot, values in current_state.items():
+            if slot in prev_state and prev_state[slot][0] in values:
+                # Remove the slot from state if its value didn't change.
+                state_update.pop(slot)
+        return state_update
+
+    def _create_examples_from_turn(
+        self, turn_id, system_utterance, user_utterance, system_frames, user_frames, prev_states, schemas
+    ):
+        """
+        Creates an example for each frame in the user turn.
+        Args:
+            turn_id (int): turn number
+            system_utterance (str): last system utterance
+            user_utterance (str): lst user utterance
+            system_frames (dict): all system utterances and slot - slot value pairs
+            user_frames (dict): all user utterances and slot - slot value pairs
+            prev_states (dict): slot - slot value pairs from the previous turns
+            schemas (obj): carries information about the service from the current turn
+        Returns:
+            examples: a list of `InputExample`s.
+            prev_states (dict): updated dialogue state
+        """
+        system_tokens, system_alignments, system_inv_alignments = self._tokenize(system_utterance)
+        user_tokens, user_alignments, user_inv_alignments = self._tokenize(user_utterance)
+        states = {}
+        base_example = InputExample(schema_config=self.schema_config, is_real_example=True, tokenizer=self._tokenizer,)
+        base_example.example_id = turn_id
+
+        _, dialog_id, turn_id_ = turn_id.split('-')
+        dialog_id_1, dialog_id_2 = dialog_id.split('_')
+        base_example.example_id_num = [int(dialog_id_1), int(dialog_id_2), int(turn_id_)]
+        base_example.add_utterance_features(
+            system_tokens, system_inv_alignments, user_tokens, user_inv_alignments, system_utterance, user_utterance
+        )
+        examples = []
+        for service, user_frame in user_frames.items():
+            # Create an example for this service.
+            example = base_example.make_copy_with_utterance_features()
+
+            example.example_id = "{}-{}".format(turn_id, service)
+            _, dialog_id, turn_id_ = turn_id.split('-')
+            dialog_id_1, dialog_id_2 = dialog_id.split('_')
+            example.example_id_num = [
+                int(dialog_id_1),
+                int(dialog_id_2),
+                int(turn_id_),
+                schemas.get_service_id(service),
+            ]
+
+            example.service_schema = schemas.get_service_schema(service)
+            system_frame = system_frames.get(service, None)
+            state = user_frame["state"]["slot_values"]
+            state_update = self._get_state_update(state, prev_states.get(service, {}))
+            states[service] = state
+            # Populate features in the example.
+            example.add_categorical_slots(state_update)
+            # The input tokens to bert are in the format [CLS] [S1] [S2] ... [SEP]
+            # [U1] [U2] ... [SEP] [PAD] ... [PAD]. For system token indices a bias of
+            # 1 is added for the [CLS] token and for user tokens a bias of 2 +
+            # len(system_tokens) is added to account for [CLS], system tokens and
+            # [SEP].
+            user_span_boundaries = self._find_subword_indices(
+                state_update, user_utterance, user_frame["slots"], user_alignments, user_tokens, 2 + len(system_tokens)
+            )
+            if system_frame is not None:
+                system_span_boundaries = self._find_subword_indices(
+                    state_update, system_utterance, system_frame["slots"], system_alignments, system_tokens, 1
+                )
+            else:
+                system_span_boundaries = {}
+            example.add_noncategorical_slots(state_update, user_span_boundaries, system_span_boundaries)
+            example.add_requested_slots(user_frame)
+            example.add_intents(user_frame)
+            examples.append(example)
+        return examples, states
+
+    def _find_subword_indices(self, slot_values, utterance, char_slot_spans, alignments, subwords, bias):
+        """Find indices for subwords corresponding to slot values."""
+        span_boundaries = {}
+        for slot, values in slot_values.items():
+            # Get all values present in the utterance for the specified slot.
+            value_char_spans = {}
+            for slot_span in char_slot_spans:
+                if slot_span["slot"] == slot:
+                    value = utterance[slot_span["start"] : slot_span["exclusive_end"]]
+                    start_tok_idx = alignments[slot_span["start"]]
+                    end_tok_idx = alignments[slot_span["exclusive_end"] - 1]
+                    if 0 <= start_tok_idx < len(subwords):
+                        end_tok_idx = min(end_tok_idx, len(subwords) - 1)
+                        value_char_spans[value] = (start_tok_idx + bias, end_tok_idx + bias)
+            for v in values:
+                if v in value_char_spans:
+                    span_boundaries[slot] = value_char_spans[v]
+                    break
+        return span_boundaries
+
+    def _tokenize(self, utterance):
+        """Tokenize the utterance using word-piece tokenization used by BERT.
+
+        Args:
+          utterance: A string containing the utterance to be tokenized.
+
+        Returns:
+          bert_tokens: A list of tokens obtained by word-piece tokenization of the
+            utterance.
+          alignments: A dict mapping indices of characters corresponding to start
+            and end positions of words (not subwords) to corresponding indices in
+            bert_tokens list.
+          inverse_alignments: A list of size equal to bert_tokens. Each element is a
+            tuple containing the index of the starting and inclusive ending
+            character of the word corresponding to the subword. This list is used
+            during inference to map word-piece indices to spans in the original
+            utterance.
+        """
+        # utterance = tokenization.convert_to_unicode(utterance)
+
+        # After _naive_tokenize, spaces and punctuation marks are all retained, i.e.
+        # direct concatenation of all the tokens in the sequence will be the
+        # original string.
+        tokens = SGDDataProcessor._naive_tokenize(utterance)
+        # Filter out empty tokens and obtain aligned character index for each token.
+        alignments = {}
+        char_index = 0
+        bert_tokens = []
+        # These lists store inverse alignments to be used during inference.
+        bert_tokens_start_chars = []
+        bert_tokens_end_chars = []
+        for token in tokens:
+            if token.strip():
+                subwords = self._tokenizer.text_to_tokens(token)
+                # Store the alignment for the index of starting character and the
+                # inclusive ending character of the token.
+                alignments[char_index] = len(bert_tokens)
+                bert_tokens_start_chars.extend([char_index] * len(subwords))
+                bert_tokens.extend(subwords)
+                # The inclusive ending character index corresponding to the word.
+                inclusive_char_end = char_index + len(token) - 1
+                alignments[inclusive_char_end] = len(bert_tokens) - 1
+                bert_tokens_end_chars.extend([inclusive_char_end] * len(subwords))
+            char_index += len(token)
+        inverse_alignments = list(zip(bert_tokens_start_chars, bert_tokens_end_chars))
+        return bert_tokens, alignments, inverse_alignments
+
+    def get_num_dialog_examples(self, dataset):
+        """
+        Gets the number of dilaog examples in the data split.
+        Args:
+          dataset: str. can be "train", "dev", or "test".
+        Returns:from nemo_nlp.data.datasets.sgd import data_utils
+          example_count: int. number of examples in the specified dataset.
+        """
+        example_count = 0
+        dialog_paths = [
+            os.path.join(self.data_dir, dataset, "dialogues_{:03d}.json".format(i)) for i in self._file_ranges[dataset]
+        ]
+        dst_set = SGDDataProcessor.load_dialogues(dialog_paths)
+        for dialog in dst_set:
+            for turn in dialog["turns"]:
+                if turn["speaker"] == "USER":
+                    example_count += len(turn["frames"])
+        return example_count
+
+    @classmethod
+    def _naive_tokenize(cls, s):
+        """
+        Tokenizes a string, separating words, spaces and punctuations.
+        Args:
+            s (str): a string
+        Returns:
+            seq_tok (list): list of words, spaces and punctuations from the s
+        """
+        # Spaces and punctuation marks are all retained, i.e. direct concatenation
+        # of all the tokens in the sequence will be the original string.
+        seq_tok = [tok for tok in re.split(r"([^a-zA-Z0-9])", s) if tok]
+        return seq_tok
+
+    @classmethod
+    def load_dialogues(cls, dialog_json_filepaths):
+        """
+        Obtain the list of all dialogues from specified json files.
+        Args:
+            dialog_json_filepaths (list): list of json files
+        Returns:
+            dialogs  (list): the list of all dialogues
+        """
+        dialogs = []
+        for dialog_json_filepath in sorted(dialog_json_filepaths):
+            with open(dialog_json_filepath, 'r') as f:
+                dialogs.extend(json.load(f))
+                f.close()
+        return dialogs
+
+    @classmethod
+    def get_dialogue_files(cls, data_dir, dataset_split, task_name):
+        """
+        Obtain the list of all dialogue json files
+        Args:
+            data_dir (str): path to the data folde
+            dataset_split (str): dev, test or train
+            task_name (str): SGD task name, see keys of the FILE_RANGES
+        Returns:
+            dialogs (list): the list of all dialogue json files paths
+        """
+        return [
+            os.path.join(data_dir, dataset_split, 'dialogues_{:03d}.json'.format(fid))
+            for fid in FILE_RANGES[task_name][dataset_split]
+        ]
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/evaluate.py b/nemo/collections/nlp/data/datasets/sgd_dataset/evaluate.py
new file mode 100644
index 000000000000..d1d18e168419
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/evaluate.py
@@ -0,0 +1,219 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Evaluate predictions JSON file, w.r.t. ground truth file.
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/evaluate.py
+"""
+
+import collections
+import glob
+import json
+
+import numpy as np
+
+import nemo.collections.nlp.data.datasets.sgd_dataset.metrics as metrics
+from nemo.utils import logging
+
+__all__ = [
+    'get_in_domain_services',
+    'get_dataset_as_dict',
+    'ALL_SERVICES',
+    'SEEN_SERVICES',
+    'UNSEEN_SERVICES',
+    'get_metrics',
+    'PER_FRAME_OUTPUT_FILENAME',
+]
+
+ALL_SERVICES = "#ALL_SERVICES"
+SEEN_SERVICES = "#SEEN_SERVICES"
+UNSEEN_SERVICES = "#UNSEEN_SERVICES"
+
+# Name of the file containing all predictions and their corresponding frame metrics.
+PER_FRAME_OUTPUT_FILENAME = "dialogues_and_metrics.json"
+
+
+def get_service_set(schema_path):
+    """Get the set of all services present in a schema."""
+    service_set = set()
+    with open(schema_path) as f:
+        schema = json.load(f)
+        for service in schema:
+            service_set.add(service["service_name"])
+        f.close()
+    return service_set
+
+
+def get_in_domain_services(schema_path, service_set):
+    """Get the set of common services between a schema and set of services.
+    Args:
+        schema_path (str): path to schema file
+        service_set (set): set of services
+    """
+    return get_service_set(schema_path) & service_set
+
+
+def get_dataset_as_dict(file_path_patterns):
+    """Read the DSTC8/SGD json dialogue data as dictionary with dialog ID as keys."""
+    dataset_dict = {}
+    if isinstance(file_path_patterns, list):
+        list_fp = file_path_patterns
+    else:
+        list_fp = sorted(glob.glob(file_path_patterns))
+    for fp in list_fp:
+        if PER_FRAME_OUTPUT_FILENAME in fp:
+            continue
+        logging.debug("Loading file: %s", fp)
+        with open(fp) as f:
+            data = json.load(f)
+            if isinstance(data, list):
+                for dial in data:
+                    dataset_dict[dial["dialogue_id"]] = dial
+            elif isinstance(data, dict):
+                dataset_dict.update(data)
+            f.close()
+    return dataset_dict
+
+
+def get_metrics(dataset_ref, dataset_hyp, service_schemas, in_domain_services, joint_acc_across_turn, no_fuzzy_match):
+    """Calculate the DSTC8/SGD metrics.
+
+  Args:
+    dataset_ref: The ground truth dataset represented as a dict mapping dialogue
+      id to the corresponding dialogue.
+    dataset_hyp: The predictions in the same format as `dataset_ref`.
+    service_schemas: A dict mapping service name to the schema for the service.
+    in_domain_services: The set of services which are present in the training
+      set.
+    schemas: Schemas with information for all services
+
+  Returns:
+    A dict mapping a metric collection name to a dict containing the values
+    for various metrics. Each metric collection aggregates the metrics across
+    a specific set of frames in the dialogues.
+  """
+    # Metrics can be aggregated in various ways, eg over all dialogues, only for
+    # dialogues containing unseen services or for dialogues corresponding to a
+    # single service. This aggregation is done through metric_collections, which
+    # is a dict mapping a collection name to a dict, which maps a metric to a list
+    # of values for that metric. Each value in this list is the value taken by
+    # the metric on a frame.
+    metric_collections = collections.defaultdict(lambda: collections.defaultdict(list))
+
+    # Ensure the dialogs in dataset_hyp also occur in dataset_ref.
+    assert set(dataset_hyp.keys()).issubset(set(dataset_ref.keys()))
+    logging.debug("len(dataset_hyp)=%d, len(dataset_ref)=%d", len(dataset_hyp), len(dataset_ref))
+
+    # Store metrics for every frame for debugging.
+    per_frame_metric = {}
+
+    for dial_id, dial_hyp in dataset_hyp.items():
+        dial_ref = dataset_ref[dial_id]
+
+        if set(dial_ref["services"]) != set(dial_hyp["services"]):
+            raise ValueError(
+                "Set of services present in ground truth and predictions don't match "
+                "for dialogue with id {}".format(dial_id)
+            )
+
+        joint_metrics = [metrics.JOINT_GOAL_ACCURACY, metrics.JOINT_CAT_ACCURACY, metrics.JOINT_NONCAT_ACCURACY]
+        for turn_id, (turn_ref, turn_hyp) in enumerate(zip(dial_ref["turns"], dial_hyp["turns"])):
+            metric_collections_per_turn = collections.defaultdict(lambda: collections.defaultdict(lambda: 1.0))
+            if turn_ref["speaker"] != turn_hyp["speaker"]:
+                raise ValueError("Speakers don't match in dialogue with id {}".format(dial_id))
+
+            # Skip system turns because metrics are only computed for user turns.
+            if turn_ref["speaker"] != "USER":
+                continue
+
+            if turn_ref["utterance"] != turn_hyp["utterance"]:
+                logging.error("Ref utt: %s", turn_ref["utterance"])
+                logging.error("Hyp utt: %s", turn_hyp["utterance"])
+                raise ValueError("Utterances don't match for dialogue with id {}".format(dial_id))
+
+            hyp_frames_by_service = {frame["service"]: frame for frame in turn_hyp["frames"]}
+
+            # Calculate metrics for each frame in each user turn.
+            for frame_ref in turn_ref["frames"]:
+                service_name = frame_ref["service"]
+                if service_name not in hyp_frames_by_service:
+                    raise ValueError(
+                        "Frame for service {} not found in dialogue with id {}".format(service_name, dial_id)
+                    )
+                service = service_schemas[service_name]
+                frame_hyp = hyp_frames_by_service[service_name]
+
+                active_intent_acc = metrics.get_active_intent_accuracy(frame_ref, frame_hyp)
+                slot_tagging_f1_scores = metrics.get_slot_tagging_f1(
+                    frame_ref, frame_hyp, turn_ref["utterance"], service
+                )
+                requested_slots_f1_scores = metrics.get_requested_slots_f1(frame_ref, frame_hyp)
+                goal_accuracy_dict = metrics.get_average_and_joint_goal_accuracy(
+                    frame_ref, frame_hyp, service, no_fuzzy_match
+                )
+
+                frame_metric = {
+                    metrics.ACTIVE_INTENT_ACCURACY: active_intent_acc,
+                    metrics.REQUESTED_SLOTS_F1: requested_slots_f1_scores.f1,
+                    metrics.REQUESTED_SLOTS_PRECISION: requested_slots_f1_scores.precision,
+                    metrics.REQUESTED_SLOTS_RECALL: requested_slots_f1_scores.recall,
+                }
+                if slot_tagging_f1_scores is not None:
+                    frame_metric[metrics.SLOT_TAGGING_F1] = slot_tagging_f1_scores.f1
+                    frame_metric[metrics.SLOT_TAGGING_PRECISION] = slot_tagging_f1_scores.precision
+                    frame_metric[metrics.SLOT_TAGGING_RECALL] = slot_tagging_f1_scores.recall
+                frame_metric.update(goal_accuracy_dict)
+
+                frame_id = "{:s}-{:03d}-{:s}".format(dial_id, turn_id, frame_hyp["service"])
+                per_frame_metric[frame_id] = frame_metric
+                # Add the frame-level metric result back to dialogues.
+                frame_hyp["metrics"] = frame_metric
+
+                # Get the domain name of the service.
+                domain_name = frame_hyp["service"].split("_")[0]
+                domain_keys = [ALL_SERVICES, frame_hyp["service"], domain_name]
+                if frame_hyp["service"] in in_domain_services:
+                    domain_keys.append(SEEN_SERVICES)
+
+                else:
+                    domain_keys.append(UNSEEN_SERVICES)
+                for domain_key in domain_keys:
+                    for metric_key, metric_value in frame_metric.items():
+                        if metric_value != metrics.NAN_VAL:
+                            if joint_acc_across_turn and metric_key in joint_metrics:
+                                metric_collections_per_turn[domain_key][metric_key] *= metric_value
+                            else:
+                                metric_collections[domain_key][metric_key].append(metric_value)
+            if joint_acc_across_turn:
+                # Conduct multiwoz style evaluation that computes joint goal accuracy
+                # across all the slot values of all the domains for each turn.
+                for domain_key in metric_collections_per_turn:
+                    for metric_key, metric_value in metric_collections_per_turn[domain_key].items():
+                        metric_collections[domain_key][metric_key].append(metric_value)
+
+    all_metric_aggregate = {}
+    for domain_key, domain_metric_vals in metric_collections.items():
+        domain_metric_aggregate = {}
+        for metric_key, value_list in domain_metric_vals.items():
+            if value_list:
+                # Metrics are macro-averaged across all frames.
+                domain_metric_aggregate[metric_key] = round(float(np.mean(value_list)) * 100.0, 2)
+            else:
+                domain_metric_aggregate[metric_key] = metrics.NAN_VAL
+        all_metric_aggregate[domain_key] = domain_metric_aggregate
+    return all_metric_aggregate, per_frame_metric
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/input_example.py b/nemo/collections/nlp/data/datasets/sgd_dataset/input_example.py
new file mode 100644
index 000000000000..a842f296d90e
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/input_example.py
@@ -0,0 +1,393 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/data_utils.py
+"""
+
+from nemo import logging
+
+__all__ = ['InputExample', 'STR_DONTCARE', 'STATUS_OFF', 'STATUS_ACTIVE', 'STATUS_DONTCARE', 'truncate_seq_pair']
+
+STR_DONTCARE = "dontcare"
+
+# These are used to represent the status of slots (off, active, dontcare) and
+# intents (off, active) in dialogue state tracking.
+STATUS_OFF = 0
+STATUS_ACTIVE = 1
+STATUS_DONTCARE = 2
+
+
+class InputExample(object):
+    """An example for training/inference."""
+
+    def __init__(
+        self,
+        schema_config,
+        service_schema=None,
+        example_id="NONE",
+        example_id_num=[],
+        is_real_example=False,
+        tokenizer=None,
+    ):
+        """Constructs an InputExample.
+
+        Args:
+          max_seq_length: The maximum length of the sequence. Sequences longer than
+            this value will be truncated.
+          service_schema: A ServiceSchema object wrapping the schema for the service
+            corresponding to this example.
+          example_id: Unique identifier for the example, like: 'train-1_00000-00-Restaurants_1'
+          example_id_num: dialogue_id and turn_id combined and service id combined into a list of ints,
+            like: [1, 0, 0, 18]
+          is_real_example: Indicates if an example is real or used for padding in a
+            minibatch.
+          tokenizer (Tokenizer): such as NemoBertTokenizer
+        """
+        self.schema_config = schema_config
+        self.service_schema = service_schema
+        self.example_id = example_id
+        self.example_id_num = example_id_num
+
+        self.is_real_example = is_real_example
+        self._max_seq_length = schema_config["MAX_SEQ_LENGTH"]
+        self._tokenizer = tokenizer
+        if self.is_real_example and self._tokenizer is None:
+            raise ValueError("Must specify tokenizer when input is a real example.")
+
+        self.user_utterance = ''
+        self.system_utterance = ''
+        # The id of each subword in the vocabulary for BERT.
+        self.utterance_ids = [0] * self._max_seq_length
+        # Denotes the identity of the sequence. Takes values 0 (system utterance) and 1 (user utterance).
+        self.utterance_segment = [0] * self._max_seq_length
+        # Mask which takes the value 0 for padded tokens and 1 otherwise.
+        self.utterance_mask = [0] * self._max_seq_length
+        # Start and inclusive end character indices in the original utterance
+        # corresponding to the tokens. This is used to obtain the character indices
+        # from the predicted subword indices during inference.
+        # NOTE: A positive value indicates the character indices in the user
+        # utterance whereas a negative value indicates the character indices in the
+        # system utterance. The indices are offset by 1 to prevent ambiguity in the
+        # 0 index, which could be in either the user or system utterance by the
+        # above convention. Now the 0 index corresponds to padded tokens.
+        self.start_char_idx = [0] * self._max_seq_length
+        self.end_char_idx = [0] * self._max_seq_length
+
+        # Number of categorical slots present in the service.
+        self.num_categorical_slots = 0
+        # The status of each categorical slot in the service.
+        self.categorical_slot_status = [STATUS_OFF] * schema_config["MAX_NUM_CAT_SLOT"]
+        # Masks out categorical status for padded cat slots
+        self.cat_slot_status_mask = [0] * len(self.categorical_slot_status)
+        # Number of values taken by each categorical slot.
+        self.num_categorical_slot_values = [0] * schema_config["MAX_NUM_CAT_SLOT"]
+        # The index of the correct value for each categorical slot.
+        self.categorical_slot_values = [0] * schema_config["MAX_NUM_CAT_SLOT"]
+        # Masks out categorical slots values for slots not used in the service
+        self.cat_slot_values_mask = [
+            [0] * schema_config["MAX_NUM_VALUE_PER_CAT_SLOT"] for _ in range(schema_config["MAX_NUM_CAT_SLOT"])
+        ]
+
+        # Number of non-categorical slots present in the service.
+        self.num_noncategorical_slots = 0
+        # The status of each non-categorical slot in the service.
+        self.noncategorical_slot_status = [STATUS_OFF] * schema_config["MAX_NUM_NONCAT_SLOT"]
+        # Masks out non-categorical status for padded cat slots
+        self.noncat_slot_status_mask = [0] * len(self.noncategorical_slot_status)
+        # The index of the starting subword corresponding to the slot span for a
+        # non-categorical slot value.
+        self.noncategorical_slot_value_start = [0] * schema_config["MAX_NUM_NONCAT_SLOT"]
+        # The index of the ending (inclusive) subword corresponding to the slot span
+        # for a non-categorical slot value.
+        self.noncategorical_slot_value_end = [0] * schema_config["MAX_NUM_NONCAT_SLOT"]
+
+        # Total number of slots present in the service. All slots are included here
+        # since every slot can be requested.
+        self.num_slots = 0
+        # Takes value 1 if the corresponding slot is requested, 0 otherwise.
+        self.requested_slot_status = [STATUS_OFF] * (
+            schema_config["MAX_NUM_CAT_SLOT"] + schema_config["MAX_NUM_NONCAT_SLOT"]
+        )
+        # Masks out requested slots that are not used for the service
+        self.requested_slot_mask = [0] * len(self.requested_slot_status)
+
+        # Total number of intents present in the service.
+        self.num_intents = 0
+        # Takes value 1 if the intent is active, 0 otherwise.
+        self.intent_status = [STATUS_OFF] * schema_config["MAX_NUM_INTENT"]
+        # Masks out intents that are not used for the service, [1] for none intent
+        self.intent_status_mask = [1] + [0] * len(self.intent_status)
+        # Label for active intent in the turn
+        self.intent_status_labels = 0
+
+    @property
+    def readable_summary(self):
+        """Get a readable dict that summarizes the attributes of an InputExample."""
+        seq_length = sum(self.utterance_mask)
+        utt_toks = self._tokenizer.ids_to_tokens(self.utterance_ids[:seq_length])
+        utt_tok_mask_pairs = list(zip(utt_toks, self.utterance_segment[:seq_length]))
+        active_intents = [
+            self.service_schema.get_intent_from_id(idx)
+            for idx, s in enumerate(self.intent_status)
+            if s == STATUS_ACTIVE
+        ]
+        if len(active_intents) > 1:
+            raise ValueError("Should not have multiple active intents in a single service.")
+        active_intent = active_intents[0] if active_intents else ""
+        slot_values_in_state = {}
+        for idx, s in enumerate(self.categorical_slot_status):
+            if s == STATUS_ACTIVE:
+                value_id = self.categorical_slot_values[idx]
+                slot_values_in_state[
+                    self.service_schema.get_categorical_slot_from_id(idx)
+                ] = self.service_schema.get_categorical_slot_value_from_id(idx, value_id)
+            elif s == STATUS_DONTCARE:
+                slot_values_in_state[self.service_schema.get_categorical_slot_from_id(idx)] = STR_DONTCARE
+        for idx, s in enumerate(self.noncategorical_slot_status):
+            if s == STATUS_ACTIVE:
+                slot = self.service_schema.get_non_categorical_slot_from_id(idx)
+                start_id = self.noncategorical_slot_value_start[idx]
+                end_id = self.noncategorical_slot_value_end[idx]
+                # Token list is consisted of the subwords that may start with "##". We
+                # remove "##" to reconstruct the original value. Note that it's not a
+                # strict restoration of the original string. It's primarily used for
+                # debugging.
+                # ex. ["san", "j", "##ose"] --> "san jose"
+                readable_value = " ".join(utt_toks[start_id : end_id + 1]).replace(" ##", "")
+                slot_values_in_state[slot] = readable_value
+            elif s == STATUS_DONTCARE:
+                slot = self.service_schema.get_non_categorical_slot_from_id(idx)
+                slot_values_in_state[slot] = STR_DONTCARE
+
+        summary_dict = {
+            "utt_tok_mask_pairs": utt_tok_mask_pairs,
+            "utt_len": seq_length,
+            "num_categorical_slots": self.num_categorical_slots,
+            "num_categorical_slot_values": self.num_categorical_slot_values,
+            "num_noncategorical_slots": self.num_noncategorical_slots,
+            "service_name": self.service_schema.service_name,
+            "active_intent": active_intent,
+            "slot_values_in_state": slot_values_in_state,
+        }
+        return summary_dict
+
+    def add_utterance_features(
+        self, system_tokens, system_inv_alignments, user_tokens, user_inv_alignments, system_utterance, user_utterance
+    ):
+        """Add utterance related features input to bert.
+
+        Note: this method modifies the system tokens and user_tokens in place to
+        make their total length <= the maximum input length for BERT model.
+
+        Args:
+          system_tokens: a list of strings which represents system utterance.
+          system_inv_alignments: a list of tuples which denotes the start and end
+            charater of the tpken that a bert token originates from in the original
+            system utterance.
+          user_tokens: a list of strings which represents user utterance.
+          user_inv_alignments: a list of tuples which denotes the start and end
+            charater of the token that a bert token originates from in the original
+            user utterance.
+        """
+        # Make user-system utterance input (in BERT format)
+        # Input sequence length for utterance BERT encoder
+        max_utt_len = self._max_seq_length
+
+        # Modify lengths of sys & usr utterance so that length of total utt
+        # (including cls_token, setp_token, sep_token) is no more than max_utt_len
+        is_too_long = truncate_seq_pair(system_tokens, user_tokens, max_utt_len - 3)
+        if is_too_long:
+            logging.debug(f'Utterance sequence truncated in example id - {self.example_id}.')
+
+        # Construct the tokens, segment mask and valid token mask which will be
+        # input to BERT, using the tokens for system utterance (sequence A) and
+        # user utterance (sequence B).
+        utt_subword = []
+        utt_seg = []
+        utt_mask = []
+        start_char_idx = []
+        end_char_idx = []
+
+        utt_subword.append(self._tokenizer.cls_token)
+        utt_seg.append(0)
+        utt_mask.append(1)
+        start_char_idx.append(0)
+        end_char_idx.append(0)
+
+        for subword_idx, subword in enumerate(system_tokens):
+            utt_subword.append(subword)
+            utt_seg.append(0)
+            utt_mask.append(1)
+            st, en = system_inv_alignments[subword_idx]
+            start_char_idx.append(-(st + 1))
+            end_char_idx.append(-(en + 1))
+
+        utt_subword.append(self._tokenizer.sep_token)
+        utt_seg.append(0)
+        utt_mask.append(1)
+        start_char_idx.append(0)
+        end_char_idx.append(0)
+
+        for subword_idx, subword in enumerate(user_tokens):
+            utt_subword.append(subword)
+            utt_seg.append(1)
+            utt_mask.append(1)
+            st, en = user_inv_alignments[subword_idx]
+            start_char_idx.append(st + 1)
+            end_char_idx.append(en + 1)
+
+        utt_subword.append(self._tokenizer.sep_token)
+        utt_seg.append(1)
+        utt_mask.append(1)
+        start_char_idx.append(0)
+        end_char_idx.append(0)
+
+        utterance_ids = self._tokenizer.tokens_to_ids(utt_subword)
+
+        # Zero-pad up to the BERT input sequence length.
+        while len(utterance_ids) < max_utt_len:
+            utterance_ids.append(0)
+            utt_seg.append(0)
+            utt_mask.append(0)
+            start_char_idx.append(0)
+            end_char_idx.append(0)
+        self.utterance_ids = utterance_ids
+        self.utterance_segment = utt_seg
+        self.utterance_mask = utt_mask
+        self.start_char_idx = start_char_idx
+        self.end_char_idx = end_char_idx
+
+        self.user_utterances = user_utterance
+        self.system_utterance = system_utterance
+
+    def make_copy_with_utterance_features(self):
+        """Make a copy of the current example with utterance features."""
+        new_example = InputExample(
+            schema_config=self.schema_config,
+            service_schema=self.service_schema,
+            example_id=self.example_id,
+            example_id_num=self.example_id_num,
+            is_real_example=self.is_real_example,
+            tokenizer=self._tokenizer,
+        )
+        new_example.utterance_ids = list(self.utterance_ids)
+        new_example.utterance_segment = list(self.utterance_segment)
+        new_example.utterance_mask = list(self.utterance_mask)
+        new_example.start_char_idx = list(self.start_char_idx)
+        new_example.end_char_idx = list(self.end_char_idx)
+        new_example.user_utterance = self.user_utterance
+        new_example.system_utterance = self.system_utterance
+        return new_example
+
+    def add_categorical_slots(self, state_update):
+        """Add features for categorical slots."""
+        categorical_slots = self.service_schema.categorical_slots
+        self.num_categorical_slots = len(categorical_slots)
+        for slot_idx, slot in enumerate(categorical_slots):
+            values = state_update.get(slot, [])
+            # Add categorical slot value features.
+            slot_values = self.service_schema.get_categorical_slot_values(slot)
+            self.num_categorical_slot_values[slot_idx] = len(slot_values)
+            # set slot mask to 1, i.e. the slot exists in the service
+            self.cat_slot_status_mask[slot_idx] = 1
+            # set the number of active slot values for this slots in the service
+            for slot_value_idx in range(len(self.service_schema._categorical_slot_values[slot])):
+                self.cat_slot_values_mask[slot_idx][slot_value_idx] = 1
+
+            if not values:
+                self.categorical_slot_status[slot_idx] = STATUS_OFF
+            elif values[0] == STR_DONTCARE:
+                self.categorical_slot_status[slot_idx] = STATUS_DONTCARE
+            else:
+                self.categorical_slot_status[slot_idx] = STATUS_ACTIVE
+                self.categorical_slot_values[slot_idx] = self.service_schema.get_categorical_slot_value_id(
+                    slot, values[0]
+                )
+
+    def add_noncategorical_slots(self, state_update, system_span_boundaries, user_span_boundaries):
+        """Add features for non-categorical slots."""
+        noncategorical_slots = self.service_schema.non_categorical_slots
+        self.num_noncategorical_slots = len(noncategorical_slots)
+        for slot_idx, slot in enumerate(noncategorical_slots):
+            values = state_update.get(slot, [])
+            self.noncat_slot_status_mask[slot_idx] = 1
+            if not values:
+                self.noncategorical_slot_status[slot_idx] = STATUS_OFF
+            elif values[0] == STR_DONTCARE:
+                self.noncategorical_slot_status[slot_idx] = STATUS_DONTCARE
+            else:
+                self.noncategorical_slot_status[slot_idx] = STATUS_ACTIVE
+                # Add indices of the start and end tokens for the first encountered
+                # value. Spans in user utterance are prioritized over the system
+                # utterance. If a span is not found, the slot value is ignored.
+                if slot in user_span_boundaries:
+                    start, end = user_span_boundaries[slot]
+                elif slot in system_span_boundaries:
+                    start, end = system_span_boundaries[slot]
+                else:
+                    # A span may not be found because the value was cropped out or because
+                    # the value was mentioned earlier in the dialogue. Since this model
+                    # only makes use of the last two utterances to predict state updates,
+                    # it will fail in such cases.
+                    logging.debug(
+                        f'"Slot values {str(values)} not found in user or system utterance in example with id - {self.example_id}.'
+                    )
+
+                    continue
+                self.noncategorical_slot_value_start[slot_idx] = start
+                self.noncategorical_slot_value_end[slot_idx] = end
+
+    def add_requested_slots(self, frame):
+        all_slots = self.service_schema.slots
+        self.num_slots = len(all_slots)
+        for slot_idx, slot in enumerate(all_slots):
+            self.requested_slot_mask[slot_idx] = 1
+            if slot in frame["state"]["requested_slots"]:
+                self.requested_slot_status[slot_idx] = STATUS_ACTIVE
+
+    def add_intents(self, frame):
+        all_intents = self.service_schema.intents
+        self.num_intents = len(all_intents)
+        for intent_idx, intent in enumerate(all_intents):
+            if intent == frame["state"]["active_intent"]:
+                self.intent_status[intent_idx] = STATUS_ACTIVE
+                # adding +1 to take none intent into account
+                # supports only 1 active intent in the turn
+                self.intent_status_labels = intent_idx + 1
+            self.intent_status_mask[intent_idx + 1] = 1
+
+
+# Modified from run_classifier._truncate_seq_pair in the public bert model repo.
+# https://github.com/google-research/bert/blob/master/run_classifier.py.
+def truncate_seq_pair(tokens_a, tokens_b, max_length):
+    """Truncate a seq pair in place so that their total length <= max_length."""
+    is_too_long = False
+    # This is a simple heuristic which will always truncate the longer sequence
+    # one token at a time. This makes more sense than truncating an equal percent
+    # of tokens from each, since if one sequence is very short then each token
+    # that's truncated likely contains more information than a longer sequence.
+    while True:
+        total_length = len(tokens_a) + len(tokens_b)
+        if total_length <= max_length:
+            break
+        is_too_long = True
+        if len(tokens_a) > len(tokens_b):
+            tokens_a.pop()
+        else:
+            tokens_b.pop()
+    return is_too_long
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/metrics.py b/nemo/collections/nlp/data/datasets/sgd_dataset/metrics.py
new file mode 100644
index 000000000000..ae2d2a23b8f3
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/metrics.py
@@ -0,0 +1,284 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""Evaluation metrics for Schema-guided dialogue.
+
+This library provides functions for calculating the evaluation metrics for a
+single dialogue. The following metrics are defined:
+
+(1) Active intent accuracy: The fraction of user turns for which the active
+  intent has been correctly predicted.
+(2) Slot tagging F1: The macro-averaged F1 score for tagging slot values for
+  non-categorical slots. This metric is optional to report in the final paper
+  if participants decide not to use slot tagging.
+(3) Requested slots F1: The macro-averaged F1 score for requested slots over the
+  turns. For a turn, if there are no requested slots in both the ground truth
+  and the prediction, that turn is skipped. The reported number is the average
+  F1 score for all un-skipped user turns. This metric is optional to report in
+  the final paper.
+(4) Average goal accuracy: For each turn, participants must predict a single
+  value for each slot present in the dialogue state. The slots which have a
+  non-empty assignment in the ground truth dialogue state are only considered.
+  This is the average accuracy of predicting the value of a slot correctly. A
+  fuzzy matching based score is used for non-categorical slots.
+(5) Joint goal accuracy: This is the average accuracy of predicting all slot
+  assignments for a turn correctly. A fuzzy matching based score is used for
+  non-categorical slots. This is the primary evaluation metric used for ranking
+  submissions. More details to follow with the evaluation script.
+
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/metrics.py
+"""
+
+import collections
+
+import numpy as np
+from rapidfuzz import fuzz
+
+F1Scores = collections.namedtuple("F1Scores", ["f1", "precision", "recall"])
+
+# Evaluation and other relevant metrics for DSTC8/SGD Schema-guided DST.
+# (1) Active intent accuracy.
+ACTIVE_INTENT_ACCURACY = "active_intent_accuracy"
+# (2) Slot tagging F1.
+SLOT_TAGGING_F1 = "slot_tagging_f1"
+SLOT_TAGGING_PRECISION = "slot_tagging_precision"
+SLOT_TAGGING_RECALL = "slot_tagging_recall"
+# (3) Requested slots F1.
+REQUESTED_SLOTS_F1 = "requested_slots_f1"
+REQUESTED_SLOTS_PRECISION = "requested_slots_precision"
+REQUESTED_SLOTS_RECALL = "requested_slots_recall"
+# (4) Average goal accuracy.
+AVERAGE_GOAL_ACCURACY = "average_goal_accuracy"
+AVERAGE_CAT_ACCURACY = "average_cat_accuracy"
+AVERAGE_NONCAT_ACCURACY = "average_noncat_accuracy"
+# (5) Joint goal accuracy.
+JOINT_GOAL_ACCURACY = "joint_goal_accuracy"
+JOINT_CAT_ACCURACY = "joint_cat_accuracy"
+JOINT_NONCAT_ACCURACY = "joint_noncat_accuracy"
+
+NAN_VAL = "NA"
+
+
+def compute_f1(list_ref, list_hyp):
+    """Compute F1 score from reference (grouth truth) list and hypothesis list.
+
+  Args:
+    list_ref: List of true elements.
+    list_hyp: List of postive (retrieved) elements.
+
+  Returns:
+    A F1Scores object containing F1, precision, and recall scores.
+  """
+
+    ref = collections.Counter(list_ref)
+    hyp = collections.Counter(list_hyp)
+    true = sum(ref.values())
+    positive = sum(hyp.values())
+    true_positive = sum((ref & hyp).values())
+    precision = float(true_positive) / positive if positive else 1.0
+    recall = float(true_positive) / true if true else 1.0
+    if precision + recall > 0.0:
+        f1 = 2.0 * precision * recall / (precision + recall)
+    else:  # The F1-score is defined to be 0 if both precision and recall are 0.
+        f1 = 0.0
+
+    return F1Scores(f1=f1, precision=precision, recall=recall)
+
+
+def fuzzy_string_match(str_ref, str_hyp):
+    """Returns fuzzy string similarity score in range [0.0, 1.0]."""
+
+    # The higher the score, the higher the similarity between the two strings.
+    return fuzz.token_sort_ratio(str_ref, str_hyp) / 100.0
+
+
+def noncat_slot_value_match(str_ref_list, str_hyp, no_fuzzy_match):
+    """Calculate non-categorical slots correctness.
+
+  Args:
+    str_ref_list: a list of reference strings.
+    str_hyp: the hypothesis string.
+    use_fuzzy_match: whether to use fuzzy string matching.
+
+  Returns:
+    score: The highest fuzzy string match score of the references and hypotheis.
+  """
+    score = 0.0
+    for str_ref in str_ref_list:
+        if no_fuzzy_match:
+            match_score = float(str_ref == str_hyp)
+        else:
+            match_score = fuzzy_string_match(str_ref, str_hyp)
+        score = max(score, match_score)
+    return score
+
+
+def compare_slot_values(slot_values_ref, slot_values_hyp, service, no_fuzzy_match):
+    """Compare and get correctness of goal state's slot_values.
+
+  Args:
+    slot_values_ref: goal state slot_values from reference (ground truth).
+    slot_values_hyp: goal state slot_values from hypothesis (prediction).
+    service: a service data structure in the schema. We use it to obtain the
+      list of slots in the service and infer whether a slot is categorical.
+    use_fuzzy_match: whether to use fuzzy string matching for non-categorical
+      slot values
+
+  Returns:
+    (list_cor, slot_active, slot_cat)
+    list_cor: list of corectness scores, each corresponding to one slot in the
+        service. The score is a float either 0.0 or 1.0 for categorical slot,
+        and in range [0.0, 1.0] for non-categorical slot.
+    slot_active: list indicating whether the element in list_cor corresponds to
+        an active ground-truth slot.
+    slot_cat: list indicating whether the element in list_cor corresponds to a
+        categorical slot.
+  """
+    list_cor = []
+    slot_active = []
+    slot_cat = []
+
+    for slot in service["slots"]:
+        slot_name = slot["name"]
+        slot_cat.append(slot["is_categorical"])
+
+        if slot_name in slot_values_ref:  # REF=active
+            slot_active.append(True)
+            if slot_name in slot_values_hyp:  # HYP=active, apply matching
+                value_ref_list = slot_values_ref[slot_name]
+                value_hyp = slot_values_hyp[slot_name][0]
+                if slot["is_categorical"]:
+                    cor = float(value_ref_list[0] == value_hyp)
+                else:
+                    cor = noncat_slot_value_match(value_ref_list, value_hyp, no_fuzzy_match)
+
+                list_cor.append(cor)
+            else:  # HYP=off
+                list_cor.append(0.0)
+        else:  # REF=off
+            slot_active.append(False)
+            if slot_name in slot_values_hyp:  # HYP=active
+                list_cor.append(0.0)
+            else:  # HYP=off
+                list_cor.append(1.0)
+
+    assert len(list_cor) == len(service["slots"])
+    assert len(slot_active) == len(service["slots"])
+    assert len(slot_cat) == len(service["slots"])
+    return list_cor, slot_active, slot_cat
+
+
+def get_active_intent_accuracy(frame_ref, frame_hyp):
+    """Get active intent accuracy of a frame.
+
+  Args:
+    frame_ref: single semantic frame from reference (ground truth) file.
+    frame_hyp: single semantic frame from hypothesis (prediction) file.
+
+  Returns:
+    1.0 if the intent prediction is correct, otherwise 0.0.
+  """
+    return float(frame_ref["state"]["active_intent"] == frame_hyp["state"]["active_intent"])
+
+
+def get_slot_tagging_f1(frame_ref, frame_hyp, utt, service):
+    """Get slot tagging (non-categorical slots only) F1 scores of a frame.
+
+  Args:
+    frame_ref: single semantic frame from reference (ground truth) file.
+    frame_hyp: single semantic frame from hypothesis (prediction) file.
+    utt: user utterance. Slot tagging annotations are the character positions in
+      the utterance.
+    service: a service data structure in the schema. We use it to infer whether
+      a slot is non-categorical.
+
+  Returns:
+    A F1Scores object containing F1, precision, and recall scores.
+  """
+
+    list_noncat_slots = [s["name"] for s in service["slots"] if not s["is_categorical"]]
+    if "slots" not in frame_hyp:
+        return None
+    else:
+        list_ref = [
+            (s["slot"], utt[s["start"] : s["exclusive_end"]])
+            for s in frame_ref["slots"]
+            if s["slot"] in list_noncat_slots
+        ]
+        list_hyp = [
+            (s["slot"], utt[s["start"] : s["exclusive_end"]])
+            for s in frame_hyp["slots"]
+            if s["slot"] in list_noncat_slots
+        ]
+        return compute_f1(list_ref, list_hyp)
+
+
+def get_requested_slots_f1(frame_ref, frame_hyp):
+    """Get requested slots F1 scores of a frame.
+
+  Args:
+    frame_ref: single semantic frame from reference (ground truth) file.
+    frame_hyp: single semantic frame from hypothesis (prediction) file.
+
+  Returns:
+    A F1Scores object containing F1, precision, and recall scores.
+  """
+    return compute_f1(frame_ref["state"]["requested_slots"], frame_hyp["state"]["requested_slots"])
+
+
+def get_average_and_joint_goal_accuracy(frame_ref, frame_hyp, service, no_fuzzy_match):
+    """Get average and joint goal accuracies of a frame.
+
+  Args:
+    frame_ref: single semantic frame from reference (ground truth) file.
+    frame_hyp: single semantic frame from hypothesis (prediction) file.
+    service: a service data structure in the schema. We use it to obtain the
+      list of slots in the service and infer whether a slot is categorical.
+    use_fuzzy_match: whether to use fuzzy string matching for comparing
+      non-categorical slot values.
+
+  Returns:
+    goal_acc: a dict whose values are average / joint
+        all-goal / categorical-goal / non-categorical-goal accuracies.
+  """
+    goal_acc = {}
+
+    list_acc, slot_active, slot_cat = compare_slot_values(
+        frame_ref["state"]["slot_values"], frame_hyp["state"]["slot_values"], service, no_fuzzy_match
+    )
+
+    # (4) Average goal accuracy.
+    active_acc = [acc for acc, active in zip(list_acc, slot_active) if active]
+    goal_acc[AVERAGE_GOAL_ACCURACY] = np.mean(active_acc) if active_acc else NAN_VAL
+    # (4-a) categorical.
+    active_cat_acc = [acc for acc, active, cat in zip(list_acc, slot_active, slot_cat) if active and cat]
+    goal_acc[AVERAGE_CAT_ACCURACY] = np.mean(active_cat_acc) if active_cat_acc else NAN_VAL
+    # (4-b) non-categorical.
+    active_noncat_acc = [acc for acc, active, cat in zip(list_acc, slot_active, slot_cat) if active and not cat]
+    goal_acc[AVERAGE_NONCAT_ACCURACY] = np.mean(active_noncat_acc) if active_noncat_acc else NAN_VAL
+
+    # (5) Joint goal accuracy.
+    goal_acc[JOINT_GOAL_ACCURACY] = np.prod(list_acc) if list_acc else NAN_VAL
+    # (5-a) categorical.
+    cat_acc = [acc for acc, cat in zip(list_acc, slot_cat) if cat]
+    goal_acc[JOINT_CAT_ACCURACY] = np.prod(cat_acc) if cat_acc else NAN_VAL
+    # (5-b) non-categorical.
+    noncat_acc = [acc for acc, cat in zip(list_acc, slot_cat) if not cat]
+    goal_acc[JOINT_NONCAT_ACCURACY] = np.prod(noncat_acc) if noncat_acc else NAN_VAL
+
+    return goal_acc
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/prediction_utils.py b/nemo/collections/nlp/data/datasets/sgd_dataset/prediction_utils.py
new file mode 100644
index 000000000000..42d655a82545
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/prediction_utils.py
@@ -0,0 +1,342 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Prediction and evaluation-related utility functions.
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/pred_utils.py
+"""
+
+import collections
+import json
+import os
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.sgd_dataset.input_example import STATUS_ACTIVE, STATUS_DONTCARE, STR_DONTCARE
+
+REQ_SLOT_THRESHOLD = 0.5
+
+__all__ = ['get_predicted_dialog_baseline', 'write_predictions_to_file']
+
+
+def get_predicted_dialog_ret_sys_act(dialog, all_predictions, schemas, eval_debug, in_domain_services):
+    """Update labels in a dialogue based on model predictions.
+  Args:
+    dialog: A json object containing dialogue whose labels are to be updated.
+    all_predictions: A dict mapping prediction name to the predicted value. See
+      SchemaGuidedDST class for the contents of this dict.
+    schemas: A Schema object wrapping all the schemas for the dataset.
+  Returns:
+    A json object containing the dialogue with labels predicted by the model.
+  """
+    # This approach retreives slot values from the history of system actions if slot is active but it can not find it in user utterance
+    # Overwrite the labels in the turn with the predictions from the model.
+    dialog_id = dialog["dialogue_id"]
+    # The slot values tracked for each service.
+    all_slot_values = collections.defaultdict(dict)
+    sys_prev_slots = collections.defaultdict(dict)
+    sys_rets = {}
+
+    for turn_idx, turn in enumerate(dialog["turns"]):
+        if turn["speaker"] == "SYSTEM":
+            for frame in turn["frames"]:
+                for action in frame["actions"]:
+                    if action["slot"] and len(action["values"]) > 0:
+                        sys_prev_slots[frame["service"]][action["slot"]] = action["values"][0]
+        elif turn["speaker"] == "USER":
+            user_utterance = turn["utterance"]
+            system_utterance = dialog["turns"][turn_idx - 1]["utterance"] if turn_idx else ""
+            turn_id = "{:02d}".format(turn_idx)
+            for frame in turn["frames"]:
+                cat_slot_status_acc = 0
+                cat_slot_status_num = 0
+                noncat_slot_status_num = 0
+                noncat_slot_status_acc = 0
+
+                predictions = all_predictions[(dialog_id, turn_id, frame["service"])]
+                slot_values = all_slot_values[frame["service"]]
+                service_schema = schemas.get_service_schema(frame["service"])
+
+                # Remove the slot spans and state if present.
+                true_slots = frame.pop("slots", None)
+                true_state = frame.pop("state", None)
+
+                # The baseline model doesn't predict slot spans. Only state predictions are added.
+                state = {}
+
+                # Add prediction for active intent. Offset is subtracted to account for NONE intent.
+                active_intent_id = predictions["intent_status"]
+                state["active_intent"] = (
+                    service_schema.get_intent_from_id(active_intent_id - 1) if active_intent_id else "NONE"
+                )
+
+                # Add prediction for requested slots.
+                requested_slots = []
+                for slot_idx, slot in enumerate(service_schema.slots):
+                    if predictions["req_slot_status"][slot_idx] > REQ_SLOT_THRESHOLD:
+                        requested_slots.append(slot)
+                state["requested_slots"] = requested_slots
+
+                # Add prediction for user goal (slot values).
+                # Categorical slots.
+                categorical_slots_dict = {}
+                non_categorical_slots_dict = {}
+
+                predictions["cat_slot_status_p"] = predictions["cat_slot_status_p"].cpu().numpy()
+                predictions["cat_slot_status"] = predictions["cat_slot_status"].cpu().numpy()
+                predictions["cat_slot_value"] = predictions["cat_slot_value"].cpu().numpy()
+                predictions["cat_slot_value_p"] = predictions["cat_slot_value_p"].cpu().numpy()
+
+                predictions["noncat_slot_status_p"] = predictions["noncat_slot_status_p"].cpu().numpy()
+                predictions["noncat_slot_status"] = predictions["noncat_slot_status"].cpu().numpy()
+                predictions["noncat_slot_p"] = predictions["noncat_slot_p"].cpu().numpy()
+
+                predictions["noncat_alignment_start"] = predictions["noncat_alignment_start"].cpu().numpy()
+                predictions["noncat_alignment_end"] = predictions["noncat_alignment_end"].cpu().numpy()
+                predictions["cat_slot_status_GT"] = predictions["cat_slot_status_GT"].cpu().numpy()
+                predictions["noncat_slot_status_GT"] = predictions["noncat_slot_status_GT"].cpu().numpy()
+
+                for slot_idx, slot in enumerate(service_schema.categorical_slots):
+                    # debugging info
+                    cat_slot_status_num += 1
+                    categorical_slots_dict[slot] = (
+                        predictions["cat_slot_status_GT"][slot_idx],
+                        predictions["cat_slot_status"][slot_idx],
+                        predictions["cat_slot_status_p"][slot_idx],
+                        service_schema.get_categorical_slot_values(slot)[predictions["cat_slot_value"][slot_idx]],
+                        predictions["cat_slot_value_p"][slot_idx],
+                    )
+
+                    if predictions["cat_slot_status_GT"][slot_idx] == predictions["cat_slot_status"][slot_idx]:
+                        cat_slot_status_acc += 1
+
+                    slot_status = predictions["cat_slot_status"][slot_idx]
+                    if slot_status == STATUS_DONTCARE:
+                        slot_values[slot] = STR_DONTCARE
+                    elif slot_status == STATUS_ACTIVE:
+                        if (
+                            predictions["cat_slot_status_p"][slot_idx] + predictions["cat_slot_value_p"][slot_idx]
+                        ) / 2 > 0.9:
+                            value_idx = predictions["cat_slot_value"][slot_idx]
+                            slot_values[slot] = service_schema.get_categorical_slot_values(slot)[value_idx]
+                        else:
+                            if slot in sys_prev_slots[frame["service"]]:
+                                # debugging info
+                                sys_rets[slot] = sys_prev_slots[frame["service"]][slot]
+                                slot_values[slot] = sys_prev_slots[frame["service"]][slot]
+                            else:
+                                value_idx = predictions["cat_slot_value"][slot_idx]
+                                slot_values[slot] = service_schema.get_categorical_slot_values(slot)[value_idx]
+
+                for slot_idx, slot in enumerate(service_schema.non_categorical_slots):
+                    tok_start_idx = predictions["noncat_slot_start"][slot_idx]
+                    tok_end_idx = predictions["noncat_slot_end"][slot_idx]
+                    ch_start_idx = predictions["noncat_alignment_start"][tok_start_idx]
+                    ch_end_idx = predictions["noncat_alignment_end"][tok_end_idx]
+
+                    # debugging nfo
+                    noncat_slot_status_num += 1
+
+                    non_categorical_slots_dict[slot] = (
+                        predictions["noncat_slot_status_GT"][slot_idx],
+                        predictions["noncat_slot_status"][slot_idx],
+                        predictions["noncat_slot_status_p"][slot_idx],
+                        (ch_start_idx, ch_end_idx),
+                        user_utterance[ch_start_idx - 1 : ch_end_idx]
+                        if (ch_start_idx > 0 and ch_end_idx > 0)
+                        else system_utterance[-ch_start_idx - 1 : -ch_end_idx],
+                        predictions["noncat_slot_p"][slot_idx],
+                    )
+                    if predictions["noncat_slot_status_GT"][slot_idx] == predictions["noncat_slot_status"][slot_idx]:
+                        noncat_slot_status_acc += 1
+
+                    slot_status = predictions["noncat_slot_status"][slot_idx]
+                    if slot_status == STATUS_DONTCARE:
+                        slot_values[slot] = STR_DONTCARE
+                    elif slot_status == STATUS_ACTIVE:
+                        tok_start_idx = predictions["noncat_slot_start"][slot_idx]
+                        tok_end_idx = predictions["noncat_slot_end"][slot_idx]
+                        ch_start_idx = predictions["noncat_alignment_start"][tok_start_idx]
+                        ch_end_idx = predictions["noncat_alignment_end"][tok_end_idx]
+
+                        if ch_start_idx > 0 and ch_end_idx > 0:
+                            # Add span from the user utterance.
+                            slot_values[slot] = user_utterance[ch_start_idx - 1 : ch_end_idx]
+                        else:
+                            if slot in sys_prev_slots[frame["service"]]:
+                                # debugging info
+                                sys_rets[slot] = sys_prev_slots[frame["service"]][slot]
+                                slot_values[slot] = sys_prev_slots[frame["service"]][slot]
+
+                if eval_debug and frame["service"] in in_domain_services:
+                    logging.debug("-----------------------------------New Frame------------------------------")
+                    logging.debug(f'SYS : {system_utterance}')
+                    logging.debug(f'USER: {user_utterance}')
+
+                    logging.debug("\n")
+                    logging.debug(f"PRED CAT: {categorical_slots_dict}")
+                    logging.debug(f"PRED NON-CAT: {non_categorical_slots_dict}")
+
+                    logging.debug("\n")
+                    logging.debug(f"SLOTS - LABEL: {true_slots}")
+                    logging.debug(f"STATE - LABEL: {true_state['slot_values']}")
+                    logging.debug(f"STATE - PRED : {slot_values}")
+
+                    logging.debug("\n")
+                    logging.debug(f"SYS PREV SLOT: {sys_prev_slots}")
+                    logging.debug(f"SYS RETS: {sys_rets}")
+                    cat_slot_status_acc = (
+                        "NAN" if cat_slot_status_num == 0 else cat_slot_status_acc / cat_slot_status_num
+                    )
+                    logging.debug(f"CAT STATUS ACC: {cat_slot_status_acc}")
+                    noncat_slot_status_acc = (
+                        "NAN" if noncat_slot_status_num == 0 else noncat_slot_status_acc / noncat_slot_status_num
+                    )
+                    logging.debug(f"NONCAT STATUS ACC: {noncat_slot_status_acc}")
+
+                # Create a new dict to avoid overwriting the state in previous turns
+                # because of use of same objects.
+                state["slot_values"] = {s: [v] for s, v in slot_values.items()}
+                frame["state"] = state
+
+    return dialog
+
+
+def get_predicted_dialog_baseline(dialog, all_predictions, schemas):
+    """Update labels in a dialogue based on model predictions.
+  Args:
+    dialog: A json object containing dialogue whose labels are to be updated.
+    all_predictions: A dict mapping prediction name to the predicted value. See
+      SchemaGuidedDST class for the contents of this dict.
+    schemas: A Schema object wrapping all the schemas for the dataset.
+  Returns:
+    A json object containing the dialogue with labels predicted by the model.
+  """
+    # Overwrite the labels in the turn with the predictions from the model. For
+    # test set, these labels are missing from the data and hence they are added.
+    dialog_id = dialog["dialogue_id"]
+    # The slot values tracked for each service.
+    all_slot_values = collections.defaultdict(dict)
+    for turn_idx, turn in enumerate(dialog["turns"]):
+        if turn["speaker"] == "USER":
+            user_utterance = turn["utterance"]
+            system_utterance = dialog["turns"][turn_idx - 1]["utterance"] if turn_idx else ""
+            turn_id = "{:02d}".format(turn_idx)
+            for frame in turn["frames"]:
+                predictions = all_predictions[(dialog_id, turn_id, frame["service"])]
+                slot_values = all_slot_values[frame["service"]]
+                service_schema = schemas.get_service_schema(frame["service"])
+                # Remove the slot spans and state if present.
+                frame.pop("slots", None)
+                frame.pop("state", None)
+
+                # The baseline model doesn't predict slot spans. Only state predictions
+                # are added.
+                state = {}
+
+                # Add prediction for active intent. Offset is subtracted to account for
+                # NONE intent.
+                active_intent_id = predictions["intent_status"]
+                state["active_intent"] = (
+                    service_schema.get_intent_from_id(active_intent_id - 1) if active_intent_id else "NONE"
+                )
+
+                # Add prediction for requested slots.
+                requested_slots = []
+                for slot_idx, slot in enumerate(service_schema.slots):
+                    if predictions["req_slot_status"][slot_idx] > REQ_SLOT_THRESHOLD:
+                        requested_slots.append(slot)
+                state["requested_slots"] = requested_slots
+
+                # Add prediction for user goal (slot values).
+                # Categorical slots.
+                for slot_idx, slot in enumerate(service_schema.categorical_slots):
+                    slot_status = predictions["cat_slot_status"][slot_idx]
+                    if slot_status == STATUS_DONTCARE:
+                        slot_values[slot] = STR_DONTCARE
+                    elif slot_status == STATUS_ACTIVE:
+                        value_idx = predictions["cat_slot_value"][slot_idx]
+                        slot_values[slot] = service_schema.get_categorical_slot_values(slot)[value_idx]
+                # Non-categorical slots.
+                for slot_idx, slot in enumerate(service_schema.non_categorical_slots):
+                    slot_status = predictions["noncat_slot_status"][slot_idx]
+                    if slot_status == STATUS_DONTCARE:
+                        slot_values[slot] = STR_DONTCARE
+                    elif slot_status == STATUS_ACTIVE:
+                        tok_start_idx = predictions["noncat_slot_start"][slot_idx]
+                        tok_end_idx = predictions["noncat_slot_end"][slot_idx]
+                        ch_start_idx = predictions["noncat_alignment_start"][tok_start_idx]
+                        ch_end_idx = predictions["noncat_alignment_end"][tok_end_idx]
+                        if ch_start_idx < 0 and ch_end_idx < 0:
+                            # Add span from the system utterance.
+                            slot_values[slot] = system_utterance[-ch_start_idx - 1 : -ch_end_idx]
+                        elif ch_start_idx > 0 and ch_end_idx > 0:
+                            # Add span from the user utterance.
+                            slot_values[slot] = user_utterance[ch_start_idx - 1 : ch_end_idx]
+                # Create a new dict to avoid overwriting the state in previous turns
+                # because of use of same objects.
+                state["slot_values"] = {s: [v] for s, v in slot_values.items()}
+                frame["state"] = state
+    return dialog
+
+
+def write_predictions_to_file(
+    predictions, input_json_files, output_dir, schemas, state_tracker, eval_debug, in_domain_services
+):
+    """Write the predicted dialogues as json files.
+
+  Args:
+    predictions: An iterator containing model predictions. This is the output of
+      the predict method in the estimator.
+    input_json_files: A list of json paths containing the dialogues to run
+      inference on.
+    schemas: Schemas to all services in the dst dataset (train, dev and test splits).
+    output_dir: The directory where output json files will be created.
+  """
+    logging.info(f"Writing predictions to {output_dir} started.")
+
+    # Index all predictions.
+    all_predictions = {}
+    for idx, prediction in enumerate(predictions):
+        if not prediction["is_real_example"]:
+            continue
+        eval_dataset, dialog_id, turn_id, service_name = prediction['example_id'].split('-')
+        all_predictions[(dialog_id, turn_id, service_name)] = prediction
+    logging.info(f'Predictions for {idx} examples in {eval_dataset} dataset are getting processed.')
+
+    # Read each input file and write its predictions.
+    for input_file_path in input_json_files:
+        with open(input_file_path) as f:
+            dialogs = json.load(f)
+            logging.debug(f'{input_file_path} file is loaded')
+            pred_dialogs = []
+            for d in dialogs:
+                if state_tracker == 'baseline':
+                    pred_dialog = get_predicted_dialog_baseline(d, all_predictions, schemas)
+                elif state_tracker == 'ret_sys_act':
+                    pred_dialog = get_predicted_dialog_ret_sys_act(
+                        d, all_predictions, schemas, eval_debug, in_domain_services
+                    )
+                else:
+                    raise ValueError(f"tracker_mode {state_tracker} is not defined.")
+                pred_dialogs.append(pred_dialog)
+            f.close()
+        input_file_name = os.path.basename(input_file_path)
+        output_file_path = os.path.join(output_dir, input_file_name)
+        with open(output_file_path, "w") as f:
+            json.dump(pred_dialogs, f, indent=2, separators=(",", ": "), sort_keys=True)
+            f.close()
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/schema.py b/nemo/collections/nlp/data/datasets/sgd_dataset/schema.py
new file mode 100644
index 000000000000..1462c6329892
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/schema.py
@@ -0,0 +1,182 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Wrappers for schemas of different services.
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/schema.py
+https://github.com/google-research/google-research/blob/master/schema_guided_dst
+"""
+
+import json
+
+from nemo import logging
+
+__all__ = ['ServiceSchema', 'Schema']
+
+
+class ServiceSchema(object):
+    """A wrapper for schema for a service."""
+
+    def __init__(self, schema_json, service_id=None):
+        self._service_name = schema_json["service_name"]
+        self._description = schema_json["description"]
+        self._schema_json = schema_json
+        self._service_id = service_id
+
+        # Construct the vocabulary for intents, slots, categorical slots,
+        # non-categorical slots and categorical slot values. These vocabs are used
+        # for generating indices for their embedding matrix.
+        self._intents = sorted(i["name"] for i in schema_json["intents"])
+        self._slots = sorted(s["name"] for s in schema_json["slots"])
+        self._categorical_slots = sorted(
+            s["name"] for s in schema_json["slots"] if s["is_categorical"] and s["name"] in self.state_slots
+        )
+        self._non_categorical_slots = sorted(
+            s["name"] for s in schema_json["slots"] if not s["is_categorical"] and s["name"] in self.state_slots
+        )
+        slot_schemas = {s["name"]: s for s in schema_json["slots"]}
+        categorical_slot_values = {}
+        categorical_slot_value_ids = {}
+        for slot in self._categorical_slots:
+            slot_schema = slot_schemas[slot]
+            values = sorted(slot_schema["possible_values"])
+            categorical_slot_values[slot] = values
+            value_ids = {value: idx for idx, value in enumerate(values)}
+            categorical_slot_value_ids[slot] = value_ids
+        self._categorical_slot_values = categorical_slot_values
+        self._categorical_slot_value_ids = categorical_slot_value_ids
+
+    @property
+    def schema_json(self):
+        return self._schema_json
+
+    @property
+    def state_slots(self):
+        """Set of slots which are permitted to be in the dialogue state."""
+        state_slots = set()
+        for intent in self._schema_json["intents"]:
+            state_slots.update(intent["required_slots"])
+            state_slots.update(intent["optional_slots"])
+        return state_slots
+
+    @property
+    def service_name(self):
+        return self._service_name
+
+    @property
+    def service_id(self):
+        return self._service_id
+
+    @property
+    def description(self):
+        return self._description
+
+    @property
+    def slots(self):
+        return self._slots
+
+    @property
+    def intents(self):
+        return self._intents
+
+    @property
+    def categorical_slots(self):
+        return self._categorical_slots
+
+    @property
+    def non_categorical_slots(self):
+        return self._non_categorical_slots
+
+    def get_categorical_slot_values(self, slot):
+        return self._categorical_slot_values[slot]
+
+    def get_slot_from_id(self, slot_id):
+        return self._slots[slot_id]
+
+    def get_intent_from_id(self, intent_id):
+        return self._intents[intent_id]
+
+    def get_categorical_slot_from_id(self, slot_id):
+        return self._categorical_slots[slot_id]
+
+    def get_non_categorical_slot_from_id(self, slot_id):
+        return self._non_categorical_slots[slot_id]
+
+    def get_categorical_slot_value_from_id(self, slot_id, value_id):
+        slot = self.categorical_slots[slot_id]
+        return self._categorical_slot_values[slot][value_id]
+
+    def get_categorical_slot_value_id(self, slot, value):
+        return self._categorical_slot_value_ids[slot][value]
+
+
+class Schema(object):
+    """Wrapper for schemas for all services in a dataset."""
+
+    def __init__(self, schema_json_paths):
+        """
+        TODO fix:
+        schema_json_paths: list of .json path to schema files of a single str with path to the json file.
+        """
+        # Load the schema from the json file.
+        if isinstance(schema_json_paths, str):
+            with open(schema_json_paths, "r") as f:
+                all_schemas = json.load(f)
+                f.close()
+        else:
+            # load multiple schemas from the list of the json files
+            all_schemas = []
+            completed_services = []
+            for schema_json_path in schema_json_paths:
+                with open(schema_json_path, "r") as f:
+                    schemas = json.load(f)
+                    f.close()
+                    logging.debug("Num of services in %s: %s", schema_json_path, len(schemas))
+
+                for service in schemas:
+                    if service['service_name'] not in completed_services:
+                        completed_services.append(service['service_name'])
+                        all_schemas.append(service)
+
+        self._services = sorted(schema["service_name"] for schema in all_schemas)
+        self._services_vocab = {v: k for k, v in enumerate(self._services)}
+        self._services_id_to_vocab = {v: k for k, v in self._services_vocab.items()}
+        service_schemas = {}
+        for schema in all_schemas:
+            service = schema["service_name"]
+            service_schemas[service] = ServiceSchema(schema, service_id=self.get_service_id(service))
+
+        self._service_schemas = service_schemas
+        self._schemas = all_schemas
+
+    def get_service_id(self, service):
+        return self._services_vocab[service]
+
+    def get_service_from_id(self, service_id):
+        return self._services[service_id]
+
+    def get_service_schema(self, service):
+        return self._service_schemas[service]
+
+    @property
+    def services(self):
+        return self._services
+
+    def save_to_file(self, file_path):
+        with open(file_path, "w") as f:
+            json.dump(self._schemas, f, indent=2)
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/schema_embedding_dataset.py b/nemo/collections/nlp/data/datasets/sgd_dataset/schema_embedding_dataset.py
new file mode 100644
index 000000000000..9bb5ff65148b
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/schema_embedding_dataset.py
@@ -0,0 +1,357 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Extract BERT embeddings for slots, values, intents in schema.
+
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/extract_schema_embedding.py
+"""
+
+import collections
+import random
+import re
+
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.sgd_dataset.input_example import truncate_seq_pair
+
+# Separator to separate the two sentences in BERT's input sequence.
+_NL_SEPARATOR = "|||"
+
+__all__ = ['SchemaEmbeddingDataset']
+
+
+class SchemaEmbeddingDataset(Dataset):
+    def __init__(self, schema_config, tokenizer, schemas):
+        """Generate the embeddings for a schema's elements.
+
+        Args:
+          tokenizer (tokenizer): such as NemoBertTokenizer
+          max_seq_length: Sequence length used for BERT model
+          schemas: Schemas for all services in the datasets
+        """
+        self._tokenizer = tokenizer
+        self.schema_config = schema_config
+        self.schemas = schemas
+
+        input_features = self._get_input_features()
+
+        self.features = collections.defaultdict(list)
+
+        for feature in input_features:
+            self.features["input_ids"].append(feature.input_ids)
+            self.features["input_mask"].append(feature.input_mask)
+            self.features["input_type_ids"].append(feature.input_type_ids)
+            self.features["embedding_tensor_name"].append(feature.embedding_tensor_name)
+            self.features["service_id"].append(feature.service_id)
+            self.features["intent_or_slot_id"].append(feature.intent_or_slot_id)
+            self.features["value_id"].append(feature.value_id)
+
+    def __len__(self):
+        return len(self.features['input_ids'])
+
+    def __getitem__(self, idx):
+        return (
+            np.array(self.features['input_ids'][idx]),
+            np.array(self.features['input_mask'][idx], dtype=np.long),
+            np.array(self.features['input_type_ids'][idx]),
+        )
+
+    def _create_feature(self, line, embedding_tensor_name, service_id, intent_or_slot_id, value_id=-1):
+        """Create a single InputFeatures instance."""
+        seq_length = self.schema_config["MAX_SEQ_LENGTH"]
+        # line = tokenization.convert_to_unicode(input_line)
+        line = line.strip()
+        text_a = None
+        text_b = None
+        m = re.match(r"^(.*) \|\|\| (.*)$", line)
+        if m is None:
+            text_a = line
+        else:
+            text_a = m.group(1)
+            text_b = m.group(2)
+
+        tokens_a = self._tokenizer.text_to_tokens(text_a)
+        tokens_b = None
+        if text_b:
+            tokens_b = self._tokenizer.text_to_tokens(text_b)
+
+        if tokens_b:
+            # Modifies `tokens_a` and `tokens_b` in place so that the total
+            # length is less than the specified length.
+            # Account for [CLS], [SEP], [SEP] with "- 3"
+            truncate_seq_pair(tokens_a, tokens_b, seq_length - 3)
+        else:
+            # Account for [CLS] and [SEP] with "- 2"
+            if len(tokens_a) > seq_length - 2:
+                tokens_a = tokens_a[0 : (seq_length - 2)]
+
+        # The convention in BERT is:
+        # (a) For sequence pairs:
+        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
+        #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
+        # (b) For single sequences:
+        #  tokens:   [CLS] the dog is hairy . [SEP]
+        #  type_ids: 0     0   0   0  0     0 0
+        #
+        # Where "type_ids" are used to indicate whether this is the first
+        # sequence or the second sequence. The embedding vectors for `type=0` and
+        # `type=1` were learned during pre-training and are added to the wordpiece
+        # embedding vector (and position vector). This is not *strictly* necessary
+        # since the [SEP] token unambiguously separates the sequences, but it
+        # makes it easier for the model to learn the concept of sequences.
+        #
+        # For classification tasks, the first vector (corresponding to [CLS]) is
+        # used as as the "sentence vector". Note that this only makes sense
+        # because the entire model is fine-tuned.
+        tokens = []
+        input_type_ids = []
+        tokens.append(self._tokenizer.cls_token)
+        input_type_ids.append(0)
+        for token in tokens_a:
+            tokens.append(token)
+            input_type_ids.append(0)
+        tokens.append(self._tokenizer.sep_token)
+        input_type_ids.append(0)
+
+        if tokens_b:
+            for token in tokens_b:
+                tokens.append(token)
+                input_type_ids.append(1)
+            tokens.append(self._tokenizer.sep_token)
+            input_type_ids.append(1)
+
+        input_ids = self._tokenizer.tokens_to_ids(tokens)
+
+        # The mask has 1 for real tokens and 0 for padding tokens. Only real
+        # tokens are attended to.
+        input_mask = [1] * len(input_ids)
+
+        # Zero-pad up to the sequence length.
+        while len(input_ids) < seq_length:
+            input_ids.append(0)
+            input_mask.append(0)
+            input_type_ids.append(0)
+        assert len(input_ids) == seq_length
+        assert len(input_mask) == seq_length
+        assert len(input_type_ids) == seq_length
+
+        return InputFeatures(
+            input_ids=input_ids,
+            input_mask=input_mask,
+            input_type_ids=input_type_ids,
+            embedding_tensor_name=embedding_tensor_name,
+            service_id=service_id,
+            intent_or_slot_id=intent_or_slot_id,
+            value_id=value_id,
+        )
+
+    def _get_intents_input_features(self, service_schema):
+        """Create features for BERT inference for all intents of a service.
+
+      We use "[service description] ||| [intent name] [intent description]" as an
+        intent's full description.
+
+      Args:
+        service_schema: A ServiceSchema object containing the schema for the
+          corresponding service.
+
+      Returns:
+        A list of InputFeatures containing features to be given as input to the
+        BERT model.
+      """
+        service_des = service_schema.description
+
+        features = []
+        intent_descriptions = {i["name"]: i["description"] for i in service_schema.schema_json["intents"]}
+        for intent_id, intent in enumerate(service_schema.intents):
+            nl_seq = " ".join([service_des, _NL_SEPARATOR, intent, intent_descriptions[intent]])
+            features.append(self._create_feature(nl_seq, "intent_emb", service_schema.service_id, intent_id))
+        return features
+
+    def _get_req_slots_input_features(self, service_schema):
+        """Create features for BERT inference for all requested slots of a service.
+
+      We use "[service description] ||| [slot name] [slot description]" as a
+        slot's full description.
+
+      Args:
+        service_schema: A ServiceSchema object containing the schema for the
+          corresponding service.
+
+      Returns:
+        A list of InputFeatures containing features to be given as input to the
+        BERT model.
+      """
+        service_des = service_schema.description
+
+        slot_descriptions = {s["name"]: s["description"] for s in service_schema.schema_json["slots"]}
+        features = []
+        for slot_id, slot in enumerate(service_schema.slots):
+            nl_seq = " ".join([service_des, _NL_SEPARATOR, slot, slot_descriptions[slot]])
+            features.append(self._create_feature(nl_seq, "req_slot_emb", service_schema.service_id, slot_id))
+        return features
+
+    def _get_goal_slots_and_values_input_features(self, service_schema):
+        """Get BERT input features for all goal slots and categorical values.
+
+      We use "[service description] ||| [slot name] [slot description]" as a
+        slot's full description.
+      We use ""[slot name] [slot description] ||| [value name]" as a categorical
+        slot value's full description.
+
+      Args:
+        service_schema: A ServiceSchema object containing the schema for the
+          corresponding service.
+
+      Returns:
+        A list of InputFeatures containing features to be given as input to the
+        BERT model.
+      """
+        service_des = service_schema.description
+
+        features = []
+        slot_descriptions = {s["name"]: s["description"] for s in service_schema.schema_json["slots"]}
+
+        for slot_id, slot in enumerate(service_schema.non_categorical_slots):
+            nl_seq = " ".join([service_des, _NL_SEPARATOR, slot, slot_descriptions[slot]])
+            features.append(self._create_feature(nl_seq, "noncat_slot_emb", service_schema.service_id, slot_id))
+
+        for slot_id, slot in enumerate(service_schema.categorical_slots):
+            nl_seq = " ".join([service_des, _NL_SEPARATOR, slot, slot_descriptions[slot]])
+            features.append(self._create_feature(nl_seq, "cat_slot_emb", service_schema.service_id, slot_id))
+            for value_id, value in enumerate(service_schema.get_categorical_slot_values(slot)):
+                nl_seq = " ".join([slot, slot_descriptions[slot], _NL_SEPARATOR, value])
+                features.append(
+                    self._create_feature(nl_seq, "cat_slot_value_emb", service_schema.service_id, slot_id, value_id)
+                )
+        return features
+
+    def _get_input_features(self):
+        """Get the input function to compute schema element embeddings.
+
+        Args:
+          schemas: A wrapper for all service schemas in the dataset to be embedded.
+
+        Returns:
+          The input_fn to be passed to the estimator.
+        """
+        # Obtain all the features.
+        features = []
+        for service in self.schemas.services:
+            service_schema = self.schemas.get_service_schema(service)
+            features.extend(self._get_intents_input_features(service_schema))
+            features.extend(self._get_req_slots_input_features(service_schema))
+            features.extend(self._get_goal_slots_and_values_input_features(service_schema))
+
+        return features
+
+    def _populate_schema_embeddings(self, schema_embeddings, hidden_states, mode):
+        """
+        Populate all schema embeddings with BERT embeddings.
+        """
+        completed_services = set()
+        batch_size, seq_len, hidden_size = hidden_states[0].shape
+
+        for idx in range(len(self)):
+            service_id = self.features['service_id'][idx]
+            service = self.schemas.get_service_from_id(service_id)
+
+            if service not in completed_services:
+                logging.debug(f"Generating embeddings for service {service}.")
+                completed_services.add(service)
+            tensor_name = self.features["embedding_tensor_name"][idx]
+            emb_mat = schema_embeddings[service_id][tensor_name]
+
+            if mode == 'random':
+                # randomly initialize schema embeddings
+                random_token = random.randint(0, seq_len - 1)
+                embedding = [round(float(x), 6) for x in hidden_states[0][idx, random_token, :].flat]
+            elif mode == 'last_layer_average':
+                # Obtain the encoding of the [CLS] token.
+                embedding = [round(float(x), 6) for x in np.mean(hidden_states[0][idx, :], 0).flat]
+            elif mode == 'baseline':
+                # Obtain the encoding of the [CLS] token.
+                embedding = [round(float(x), 6) for x in hidden_states[0][idx, 0, :].flat]
+            else:
+                raise ValueError(f'Mode {mode} for generation schema embeddings is not supported')
+            intent_or_slot_id = self.features['intent_or_slot_id'][idx]
+            value_id = self.features['value_id'][idx]
+
+            if tensor_name == "cat_slot_value_emb":
+                emb_mat[intent_or_slot_id, value_id] = embedding
+            else:
+                emb_mat[intent_or_slot_id] = embedding
+
+    def save_embeddings(self, bert_hidden_states, output_file, mode):
+        """Generate schema element embeddings and save it as a numpy file."""
+        schema_embeddings = []
+        max_num_intent = self.schema_config["MAX_NUM_INTENT"]
+        max_num_cat_slot = self.schema_config["MAX_NUM_CAT_SLOT"]
+        max_num_noncat_slot = self.schema_config["MAX_NUM_NONCAT_SLOT"]
+        max_num_slot = max_num_cat_slot + max_num_noncat_slot
+        max_num_value = self.schema_config["MAX_NUM_VALUE_PER_CAT_SLOT"]
+        embedding_dim = self.schema_config["EMBEDDING_DIMENSION"]
+
+        for _ in self.schemas.services:
+            schema_embeddings.append(
+                {
+                    "intent_emb": np.zeros([max_num_intent, embedding_dim]),
+                    "req_slot_emb": np.zeros([max_num_slot, embedding_dim]),
+                    "cat_slot_emb": np.zeros([max_num_cat_slot, embedding_dim]),
+                    "noncat_slot_emb": np.zeros([max_num_noncat_slot, embedding_dim]),
+                    "cat_slot_value_emb": np.zeros([max_num_cat_slot, max_num_value, embedding_dim]),
+                }
+            )
+
+        # Populate the embeddings based on bert inference results and save them.
+        self._populate_schema_embeddings(schema_embeddings, bert_hidden_states, mode)
+
+        master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+        if master_device:
+            with open(output_file, "wb") as f_s:
+                np.save(f_s, schema_embeddings)
+                logging.info(f"The schema embeddings saved at {output_file}")
+                f_s.close()
+
+
+class InputFeatures(object):
+    """A single set of features for BERT inference."""
+
+    def __init__(
+        self, input_ids, input_mask, input_type_ids, embedding_tensor_name, service_id, intent_or_slot_id, value_id
+    ):
+        # The ids in the vocabulary for input tokens.
+        self.input_ids = input_ids
+        # A boolean mask indicating which tokens in the input_ids are valid.
+        self.input_mask = input_mask
+        # Denotes the sequence each input token belongs to.
+        self.input_type_ids = input_type_ids
+        # The name of the embedding tensor corresponding to this example.
+        self.embedding_tensor_name = embedding_tensor_name
+        # The id of the service corresponding to this example.
+        self.service_id = service_id
+        # The id of the intent (for intent embeddings) or slot (for slot or slot
+        # value embeddings) corresponding to this example.
+        self.intent_or_slot_id = intent_or_slot_id
+        # The id of the value corresponding to this example. Only set if slot value
+        # embeddings are being calculated.
+        self.value_id = value_id
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/schema_processor.py b/nemo/collections/nlp/data/datasets/sgd_dataset/schema_processor.py
new file mode 100644
index 000000000000..cddb7f79f6a6
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/schema_processor.py
@@ -0,0 +1,161 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst
+"""
+
+import collections
+import os
+
+import numpy as np
+import torch
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.sgd_dataset import schema
+from nemo.collections.nlp.data.datasets.sgd_dataset.schema_embedding_dataset import SchemaEmbeddingDataset
+from nemo.collections.nlp.nm.data_layers.bert_inference_datalayer import BertInferDataLayer
+from nemo.collections.nlp.utils.data_utils import concatenate
+
+__all__ = ['SchemaPreprocessor']
+
+
+class SchemaPreprocessor:
+    """ 
+    Convert the raw data to the standard format supported by
+    StateTrackingSGDData.
+    
+    Args:
+        data_dir (str) - Directory for the downloaded DSTC8/SGD data, which contains
+            the dialogue files and schema files of all datasets (eg train, dev)
+        dialogues_example_dir (str) - Directory where preprocessed DSTC8/SGD dialogues are stored
+        schema_embedding_dir (str) - Directory where .npy file for embedding of
+            entities (slots, values, intents) in the dataset_split's
+            schema are stored.
+        task_name (str) - The name of the task to train
+        vocab_file (str) - The path to BERT vocab file
+        do_lower_case - (bool) - Whether to lower case the input text.
+            Should be True for uncased models and False for cased models.
+        max_seq_length (int) - The maximum total input sequence length after
+            WordPiece tokenization. Sequences longer than this will be
+            truncated, and sequences shorter than this will be padded."
+        tokenizer - tokenizer
+        bert_model - pretrained BERT model
+        dataset_split (str) - Dataset split for training / prediction (train/dev/test)
+        overwrite_dial_file (bool) - Whether to generate a new file saving
+            the dialogue examples overwrite_schema_emb_file,
+        bert_ckpt_dir (str) - Directory containing pre-trained BERT checkpoint
+        nf - NeuralModuleFactory
+        mode(str): Schema embeddings initialization mode, baseline is ['CLS'] token embeddings
+        from the last BERT layer
+    """
+
+    def __init__(
+        self,
+        data_dir,
+        schema_embedding_dir,
+        schema_config,
+        tokenizer,
+        bert_model,
+        overwrite_schema_emb_files,
+        bert_ckpt_dir,
+        nf,
+        datasets=['train', 'test', 'dev'],
+        mode='baseline',
+        is_trainable=False,
+    ):
+
+        # Dimension of the embedding for intents, slots and categorical slot values in
+        # Maximum allowed number of categorical trackable slots for a service.
+        self.schema_config = schema_config.copy()
+
+        self.is_trainable = is_trainable
+        self.datasets = datasets
+
+        for dataset_split in ['train', 'test', 'dev']:
+            if dataset_split not in self.datasets:
+                logging.warning(
+                    'WARNING: %s set was not included and won\'t be processed. Services from this dataset split '
+                    + 'won\'t be supported',
+                    dataset_split,
+                )
+        os.makedirs(schema_embedding_dir, exist_ok=True)
+
+        tokenizer_type = type(tokenizer.tokenizer).__name__
+        vocab_size = getattr(tokenizer, "vocab_size", 0)
+        self.schema_embedding_file = os.path.join(
+            schema_embedding_dir,
+            "{}_{}_{}_{}_pretrained_schema_embedding.npy".format(
+                '_'.join(self.datasets), mode, tokenizer_type, vocab_size
+            ),
+        )
+        all_schema_json_paths = []
+        for dataset_split in self.datasets:
+            all_schema_json_paths.append(os.path.join(data_dir, dataset_split, "schema.json"))
+        self.schemas = schema.Schema(all_schema_json_paths)
+
+        if not os.path.exists(self.schema_embedding_file) or overwrite_schema_emb_files:
+            # Generate the schema embeddings if needed or specified
+            logging.info(f"Start generating the schema embeddings.")
+            dataset_params = {
+                "schema_config": schema_config,
+                "tokenizer": tokenizer,
+                "schemas": self.schemas,
+            }
+            emb_datalayer = BertInferDataLayer(
+                dataset_type=SchemaEmbeddingDataset, dataset_params=dataset_params, batch_size=1, shuffle=False,
+            )
+
+            input_ids, input_mask, input_type_ids = emb_datalayer()
+
+            hidden_states = bert_model(input_ids=input_ids, token_type_ids=input_type_ids, attention_mask=input_mask)
+            evaluated_tensors = nf.infer(tensors=[hidden_states], checkpoint_dir=bert_ckpt_dir)
+
+            master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+            if master_device:
+                hidden_states = [concatenate(tensors) for tensors in evaluated_tensors]
+                emb_datalayer.dataset.save_embeddings(hidden_states, self.schema_embedding_file, mode)
+                logging.info(f"Finish generating the schema embeddings.")
+
+        # wait until the master process writes to the schema embedding file
+        if torch.distributed.is_initialized():
+            torch.distributed.barrier()
+
+        with open(self.schema_embedding_file, "rb") as f:
+            self.schema_embeddings = np.load(f, allow_pickle=True)
+            f.close()
+
+    def get_schema_embeddings(self):
+        # Convert from list of dict to dict of list
+        schema_data_dict = collections.defaultdict(list)
+        for service in self.schema_embeddings:
+            schema_data_dict["cat_slot_emb"].append(service["cat_slot_emb"])
+            schema_data_dict["cat_slot_value_emb"].append(service["cat_slot_value_emb"])
+            schema_data_dict["noncat_slot_emb"].append(service["noncat_slot_emb"])
+            schema_data_dict["req_slot_emb"].append(service["req_slot_emb"])
+            schema_data_dict["intent_emb"].append(service["intent_emb"])
+        return schema_data_dict
+
+    def _get_schema_embedding_file_name(self):
+        return self.schema_embedding_file
+
+    def get_service_names_to_id_dict(self):
+        return self.schemas._services_vocab
+
+    def get_ids_to_service_names_dict(self):
+        return self.schemas._services_id_to_vocab
diff --git a/nemo/collections/nlp/data/datasets/sgd_dataset/sgd_dataset.py b/nemo/collections/nlp/data/datasets/sgd_dataset/sgd_dataset.py
new file mode 100644
index 000000000000..bbd2c239c996
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/sgd_dataset/sgd_dataset.py
@@ -0,0 +1,67 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst
+"""
+import numpy as np
+from torch.utils.data import Dataset
+
+__all__ = ['SGDDataset']
+
+
+class SGDDataset(Dataset):
+    """ 
+    Processes SGD dataset
+    Args:
+        dataset_split (str): train/dev/test
+        dialogues_processor (obj): Data generator for SGD dialogues
+    """
+
+    def __init__(self, dataset_split, dialogues_processor):
+        self.features = dialogues_processor.get_dialog_examples(dataset_split)
+
+    def __len__(self):
+        return len(self.features)
+
+    def __getitem__(self, idx):
+        ex = self.features[idx]
+        service_id = ex.service_schema.service_id
+
+        return (
+            np.array(ex.example_id_num),
+            np.array(service_id),
+            np.array(ex.is_real_example, dtype=int),
+            np.array(ex.utterance_ids),
+            np.array(ex.utterance_segment),
+            np.array(ex.utterance_mask, dtype=np.long),
+            np.array(ex.categorical_slot_status),
+            np.array(ex.cat_slot_status_mask),
+            np.array(ex.categorical_slot_values),
+            np.array(ex.cat_slot_values_mask),
+            np.array(ex.noncategorical_slot_status),
+            np.array(ex.noncat_slot_status_mask),
+            np.array(ex.noncategorical_slot_value_start),
+            np.array(ex.noncategorical_slot_value_end),
+            np.array(ex.start_char_idx),  # noncat_alignment_start
+            np.array(ex.end_char_idx),  # noncat_alignment_end
+            np.array(ex.num_slots),  # num_requested_slots
+            np.array(ex.requested_slot_status, dtype=np.float32),
+            np.array(ex.requested_slot_mask),
+            np.array(ex.intent_status_mask),
+            np.array(ex.intent_status_labels),
+        )
diff --git a/nemo/collections/nlp/data/datasets/text_classification/__init__.py b/nemo/collections/nlp/data/datasets/text_classification/__init__.py
new file mode 100644
index 000000000000..8a6baeb34da6
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/text_classification/__init__.py
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.data.datasets.text_classification.text_classification_dataset import *
+from nemo.collections.nlp.data.datasets.text_classification.text_classification_descriptor import *
diff --git a/nemo/collections/nlp/data/datasets/text_classification/text_classification_dataset.py b/nemo/collections/nlp/data/datasets/text_classification/text_classification_dataset.py
new file mode 100644
index 000000000000..21af59b5ec60
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/text_classification/text_classification_dataset.py
@@ -0,0 +1,239 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+"""
+Utility functions for Token Classification NLP tasks
+Some parts of this code were adapted from the HuggingFace library at
+https://github.com/huggingface/pytorch-pretrained-BERT
+"""
+
+import os
+import random
+
+import h5py
+import numpy as np
+from torch.utils.data import Dataset
+
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import get_stats
+from nemo.collections.nlp.utils.callback_utils import list2str
+
+__all__ = ['BertTextClassificationDataset']
+
+
+class BertTextClassificationDataset(Dataset):
+    """A dataset class that converts from raw data to
+    a dataset that can be used by DataLayerNM.
+
+    Args:
+        input_file (str): file to sequence + label.
+            the first line is header (sentence [tab] label)
+            each line should be [sentence][tab][label]
+        max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
+        tokenizer (Tokenizer): such as NemoBertTokenizer
+        num_samples (int): number of samples you want to use for the dataset.
+            If -1, use all dataset. Useful for testing.
+    """
+
+    def __init__(
+        self, input_file, max_seq_length, tokenizer, num_samples=-1, shuffle=False, use_cache=False,
+    ):
+        self.input_file = input_file
+        self.max_seq_length = max_seq_length
+        self.tokenizer = tokenizer
+        self.num_samples = num_samples
+        self.use_cache = use_cache
+        self.shuffle = shuffle
+        self.vocab_size = self.tokenizer.tokenizer.vocab_size
+
+        if use_cache:
+            data_dir, filename = os.path.split(input_file)
+            vocab_size = getattr(tokenizer, "vocab_size", 0)
+            tokenizer_type = type(tokenizer.tokenizer).__name__
+            cached_features_file = os.path.join(
+                data_dir,
+                "cached_{}_{}_{}_{}".format(
+                    filename[:-4], tokenizer_type, str(max_seq_length), str(vocab_size), '.hdf5'
+                ),
+            )
+
+        if use_cache and os.path.exists(cached_features_file):
+            self.load_cached_features(cached_features_file)
+        else:
+            with open(input_file, "r") as f:
+                sent_labels, all_sent_subtokens = [], []
+                sent_lengths = []
+                too_long_count = 0
+
+                lines = f.readlines()[1:]
+                logging.info(f'{input_file}: {len(lines)}')
+
+                if shuffle or num_samples > -1:
+                    random.seed(0)
+                    random.shuffle(lines)
+                    if num_samples > 0:
+                        lines = lines[:num_samples]
+
+                for index, line in enumerate(lines):
+                    if index % 20000 == 0:
+                        logging.debug(f"Processing line {index}/{len(lines)}")
+
+                    line_splited = line.strip().split()
+                    sent_label = int(line_splited[-1])
+                    sent_labels.append(sent_label)
+                    sent_words = line_splited[:-1]
+                    sent_subtokens = [tokenizer.cls_token]
+
+                    for word in sent_words:
+                        word_tokens = tokenizer.text_to_tokens(word)
+                        sent_subtokens.extend(word_tokens)
+
+                    sent_subtokens.append(tokenizer.sep_token)
+
+                    all_sent_subtokens.append(sent_subtokens)
+                    sent_lengths.append(len(sent_subtokens))
+            get_stats(sent_lengths)
+
+            for i in range(len(all_sent_subtokens)):
+                if len(all_sent_subtokens[i]) > max_seq_length:
+                    shorten_sent = all_sent_subtokens[i][-max_seq_length + 1 :]
+                    all_sent_subtokens[i] = [tokenizer.cls_token] + shorten_sent
+                    too_long_count += 1
+
+            logging.info(
+                f'{too_long_count} out of {len(sent_lengths)} \
+                        sentences with more than {max_seq_length} subtokens.'
+            )
+
+            self.convert_sequences_to_features(all_sent_subtokens, sent_labels, tokenizer, max_seq_length)
+
+            if self.use_cache:
+                self.cache_features(cached_features_file, self.features)
+
+                # update self.features to use features from hdf5
+                self.load_cached_features(cached_features_file)
+
+    def __len__(self):
+        if self.use_cache:
+            return len(self.features[0])
+
+        else:
+            return len(self.features)
+
+    def __getitem__(self, idx):
+        if self.use_cache:
+            return (self.features[0][idx], self.features[1][idx], self.features[2][idx], self.features[3][idx])
+
+        else:
+            feature = self.features[idx]
+
+            return (
+                np.array(feature.input_ids),
+                np.array(feature.segment_ids),
+                np.array(feature.input_mask, dtype=np.long),
+                feature.sent_label,
+            )
+
+    def convert_sequences_to_features(self, all_sent_subtokens, sent_labels, tokenizer, max_seq_length):
+        """Loads a data file into a list of `InputBatch`s.
+        """
+
+        self.features = []
+        for sent_id in range(len(all_sent_subtokens)):
+            sent_subtokens = all_sent_subtokens[sent_id]
+            sent_label = sent_labels[sent_id]
+
+            input_ids = [tokenizer.tokens_to_ids(t) for t in sent_subtokens]
+
+            # The mask has 1 for real tokens and 0 for padding tokens.
+            # Only real tokens are attended to.
+            input_mask = [1] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            while len(input_ids) < max_seq_length:
+                input_ids.append(0)
+                input_mask.append(0)
+            segment_ids = [0] * max_seq_length
+
+            assert len(input_ids) == max_seq_length
+            assert len(input_mask) == max_seq_length
+
+            if sent_id < 5:
+                logging.info("*** Example ***")
+                logging.info("example_index: %s" % sent_id)
+                logging.info("subtokens: %s" % " ".join(sent_subtokens))
+                logging.info("sent_label: %s" % sent_label)
+                logging.info("input_ids: %s" % list2str(input_ids))
+                logging.info("input_mask: %s" % list2str(input_mask))
+
+            self.features.append(
+                InputFeatures(
+                    sent_id=sent_id,
+                    sent_label=sent_label,
+                    input_ids=input_ids,
+                    input_mask=input_mask,
+                    segment_ids=segment_ids,
+                )
+            )
+
+    def cache_features(self, cached_features_file, features):
+        len_features = len(features)
+        input_ids_array = np.zeros((len_features, self.max_seq_length))
+        segment_ids_array = np.zeros((len_features, self.max_seq_length))
+        input_mask_array = np.zeros((len_features, self.max_seq_length))
+        sent_labels_array = np.zeros((len_features,))
+
+        for idx in range(len_features):
+            input_ids_array[idx] = features[idx].input_ids
+            segment_ids_array[idx] = features[idx].segment_ids
+            input_mask_array[idx] = features[idx].input_mask
+            sent_labels_array[idx] = features[idx].sent_label
+
+        f = h5py.File(cached_features_file, mode='w')
+        f.create_dataset('input_ids', data=input_ids_array)
+        f.create_dataset('segment_ids', data=segment_ids_array)
+        f.create_dataset('input_mask', data=input_mask_array)
+        f.create_dataset('sent_labels', data=sent_labels_array)
+        f.close()
+
+    def load_cached_features(self, cached_features_file):
+        f = h5py.File(cached_features_file, 'r')
+        keys = ['input_ids', 'segment_ids', 'input_mask', 'sent_labels']
+        self.features = [np.asarray(f[key], dtype=np.long) for key in keys]
+        f.close()
+        logging.info(f'features restored from {cached_features_file}')
+
+        if self.shuffle:
+            np.random.seed(0)
+            idx = np.arange(len(self))
+            np.random.shuffle(idx)  # shuffle idx in place
+            shuffled_features = [arr[idx] for arr in self.features]
+            self.features = shuffled_features
+
+        if self.num_samples > 0:
+            truncated_features = [arr[0 : self.num_samples] for arr in self.features]
+            self.features = truncated_features
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+
+    def __init__(self, sent_id, sent_label, input_ids, input_mask, segment_ids):
+        self.sent_id = sent_id
+        self.sent_label = sent_label
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
diff --git a/nemo/collections/nlp/data/datasets/text_classification/text_classification_descriptor.py b/nemo/collections/nlp/data/datasets/text_classification/text_classification_descriptor.py
new file mode 100644
index 000000000000..dbcbd03aaad8
--- /dev/null
+++ b/nemo/collections/nlp/data/datasets/text_classification/text_classification_descriptor.py
@@ -0,0 +1,56 @@
+from nemo import logging
+from nemo.collections.nlp.data.datasets.datasets_utils import (
+    fill_class_weights,
+    get_freq_weights,
+    get_label_stats,
+    if_exist,
+)
+
+__all__ = ['TextClassificationDataDesc']
+
+
+class TextClassificationDataDesc:
+    def __init__(self, data_dir, modes=['train', 'test', 'dev']):
+        self.data_dir = data_dir
+
+        max_label_id = 0
+        for mode in modes:
+            if not if_exist(self.data_dir, [f'{mode}.tsv']):
+                logging.info(f'Stats calculation for {mode} mode is skipped as {mode}.tsv was not found.')
+                continue
+
+            input_file = f'{self.data_dir}/{mode}.tsv'
+            with open(input_file, 'r') as f:
+                input_lines = f.readlines()[1:]  # Skipping headers at index 0
+
+            try:
+                int(input_lines[0].strip().split()[-1])
+            except ValueError:
+                logging.warning(f'No numerical labels found for {mode}.tsv.')
+                raise
+
+            queries, raw_sentences = [], []
+            for input_line in input_lines:
+                parts = input_line.strip().split()
+                label = int(parts[-1])
+                raw_sentences.append(label)
+                queries.append(' '.join(parts[:-1]))
+
+            infold = input_file[: input_file.rfind('/')]
+
+            logging.info(f'Three most popular classes in {mode} dataset')
+            total_sents, sent_label_freq, max_id = get_label_stats(
+                raw_sentences, infold + f'/{mode}_sentence_stats.tsv'
+            )
+            max_label_id = max(max_label_id, max_id)
+
+            if mode == 'train':
+                class_weights_dict = get_freq_weights(sent_label_freq)
+                logging.info(f'Class Weights: {class_weights_dict}')
+
+            logging.info(f'Total Sentences: {total_sents}')
+            logging.info(f'Sentence class frequencies - {sent_label_freq}')
+
+        self.class_weights = fill_class_weights(class_weights_dict, max_label_id)
+
+        self.num_labels = max_label_id + 1
diff --git a/nemo/collections/nlp/data/datasets/text_classification_dataset.py b/nemo/collections/nlp/data/datasets/text_classification_dataset.py
deleted file mode 100644
index 11340ffa4da5..000000000000
--- a/nemo/collections/nlp/data/datasets/text_classification_dataset.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Copyright 2018 The Google AI Language Team Authors and
-# The HuggingFace Inc. team.
-# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Utility functions for Token Classification NLP tasks
-Some parts of this code were adapted from the HuggingFace library at
-https://github.com/huggingface/pytorch-pretrained-BERT
-"""
-
-import random
-
-import numpy as np
-from torch.utils.data import Dataset
-
-from nemo import logging
-from nemo.collections.nlp.data.datasets.datasets_utils import (
-    get_intent_labels,
-    get_label_stats,
-    get_stats,
-    process_imdb,
-    process_jarvis_datasets,
-    process_nlu,
-    process_sst_2,
-    process_thucnews,
-)
-from nemo.collections.nlp.utils.callback_utils import list2str
-from nemo.collections.nlp.utils.common_nlp_utils import calc_class_weights, if_exist
-
-__all__ = ['BertTextClassificationDataset']
-
-
-class BertTextClassificationDataset(Dataset):
-    """A dataset class that converts from raw data to
-    a dataset that can be used by DataLayerNM.
-
-    Args:
-        input_file (str): file to sequence + label.
-            the first line is header (sentence [tab] label)
-            each line should be [sentence][tab][label]
-        max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
-        tokenizer (Tokenizer): such as BertTokenizer
-        num_samples (int): number of samples you want to use for the dataset.
-            If -1, use all dataset. Useful for testing.
-        shuffle (bool): whether to shuffle your data.
-    """
-
-    def __init__(self, input_file, max_seq_length, tokenizer, num_samples=-1, shuffle=True):
-        with open(input_file, "r") as f:
-            sent_labels, all_sent_subtokens = [], []
-            sent_lengths = []
-            too_long_count = 0
-
-            lines = f.readlines()[1:]
-            logging.info(f'{input_file}: {len(lines)}')
-
-            if shuffle or num_samples > -1:
-                random.seed(0)
-                random.shuffle(lines)
-                if num_samples > 0:
-                    lines = lines[:num_samples]
-
-            for index, line in enumerate(lines):
-                if index % 20000 == 0:
-                    logging.debug(f"Processing line {index}/{len(lines)}")
-
-                sent_label = int(line.split()[-1])
-                sent_labels.append(sent_label)
-                sent_words = line.strip().split()[:-1]
-                sent_subtokens = ['[CLS]']
-
-                for word in sent_words:
-                    word_tokens = tokenizer.tokenize(word)
-                    sent_subtokens.extend(word_tokens)
-
-                sent_subtokens.append('[SEP]')
-
-                all_sent_subtokens.append(sent_subtokens)
-                sent_lengths.append(len(sent_subtokens))
-
-        get_stats(sent_lengths)
-        self.max_seq_length = min(max_seq_length, max(sent_lengths))
-
-        for i in range(len(all_sent_subtokens)):
-            if len(all_sent_subtokens[i]) > self.max_seq_length:
-                shorten_sent = all_sent_subtokens[i][-self.max_seq_length + 1 :]
-                all_sent_subtokens[i] = ['[CLS]'] + shorten_sent
-                too_long_count += 1
-
-        logging.info(
-            f'{too_long_count} out of {len(sent_lengths)} \
-                       sentencess with more than {max_seq_length} subtokens.'
-        )
-
-        self.convert_sequences_to_features(all_sent_subtokens, sent_labels, tokenizer, self.max_seq_length)
-
-        self.tokenizer = tokenizer
-        self.vocab_size = self.tokenizer.vocab_size
-
-    def __len__(self):
-        return len(self.features)
-
-    def __getitem__(self, idx):
-
-        feature = self.features[idx]
-
-        return (
-            np.array(feature.input_ids),
-            np.array(feature.segment_ids),
-            np.array(feature.input_mask, dtype=np.long),
-            feature.sent_label,
-        )
-
-    def convert_sequences_to_features(self, all_sent_subtokens, sent_labels, tokenizer, max_seq_length):
-        """Loads a data file into a list of `InputBatch`s.
-        """
-
-        self.features = []
-        for sent_id in range(len(all_sent_subtokens)):
-            sent_subtokens = all_sent_subtokens[sent_id]
-            sent_label = sent_labels[sent_id]
-
-            input_ids = [tokenizer._convert_token_to_id(t) for t in sent_subtokens]
-
-            # The mask has 1 for real tokens and 0 for padding tokens.
-            # Only real tokens are attended to.
-            input_mask = [1] * len(input_ids)
-
-            # Zero-pad up to the sequence length.
-            while len(input_ids) < max_seq_length:
-                input_ids.append(0)
-                input_mask.append(0)
-            segment_ids = [0] * max_seq_length
-
-            assert len(input_ids) == max_seq_length
-            assert len(input_mask) == max_seq_length
-
-            if sent_id == 0:
-                logging.info("*** Example ***")
-                logging.info("example_index: %s" % sent_id)
-                logging.info("subtokens: %s" % " ".join(sent_subtokens))
-                logging.info("sent_label: %s" % sent_label)
-                logging.info("input_ids: %s" % list2str(input_ids))
-                logging.info("input_mask: %s" % list2str(input_mask))
-
-            self.features.append(
-                InputFeatures(
-                    sent_id=sent_id,
-                    sent_label=sent_label,
-                    input_ids=input_ids,
-                    input_mask=input_mask,
-                    segment_ids=segment_ids,
-                )
-            )
-
-
-class InputFeatures(object):
-    """A single set of features of data."""
-
-    def __init__(self, sent_id, sent_label, input_ids, input_mask, segment_ids):
-        self.sent_id = sent_id
-        self.sent_label = sent_label
-        self.input_ids = input_ids
-        self.input_mask = input_mask
-        self.segment_ids = segment_ids
-
-
-class SentenceClassificationDataDesc:
-    def __init__(self, dataset_name, data_dir, do_lower_case):
-        if dataset_name == 'sst-2':
-            self.data_dir = process_sst_2(data_dir)
-            self.num_labels = 2
-            self.eval_file = self.data_dir + '/dev.tsv'
-        elif dataset_name == 'imdb':
-            self.num_labels = 2
-            self.data_dir = process_imdb(data_dir, do_lower_case)
-            self.eval_file = self.data_dir + '/test.tsv'
-        elif dataset_name == 'thucnews':
-            self.num_labels = 14
-            self.data_dir = process_thucnews(data_dir)
-            self.eval_file = self.data_dir + '/test.tsv'
-        elif dataset_name.startswith('nlu-'):
-            if dataset_name.endswith('chat'):
-                self.data_dir = f'{data_dir}/ChatbotCorpus.json'
-                self.num_labels = 2
-            elif dataset_name.endswith('ubuntu'):
-                self.data_dir = f'{data_dir}/AskUbuntuCorpus.json'
-                self.num_labels = 5
-            elif dataset_name.endswith('web'):
-                data_dir = f'{data_dir}/WebApplicationsCorpus.json'
-                self.num_labels = 8
-            self.data_dir = process_nlu(data_dir, do_lower_case, dataset_name=dataset_name)
-            self.eval_file = self.data_dir + '/test.tsv'
-        elif dataset_name.startswith('jarvis'):
-            self.data_dir = process_jarvis_datasets(
-                data_dir, do_lower_case, dataset_name, modes=['train', 'test', 'eval'], ignore_prev_intent=False
-            )
-
-            intents = get_intent_labels(f'{self.data_dir}/dict.intents.csv')
-            self.num_labels = len(intents)
-        else:
-            raise ValueError(
-                "Looks like you passed a dataset name that isn't "
-                "already supported by NeMo. Please make sure "
-                "that you build the preprocessing method for it."
-            )
-
-        self.train_file = self.data_dir + '/train.tsv'
-
-        for mode in ['train', 'test', 'eval']:
-
-            if not if_exist(self.data_dir, [f'{mode}.tsv']):
-                logging.info(f' Stats calculation for {mode} mode' f' is skipped as {mode}.tsv was not found.')
-                continue
-
-            input_file = f'{self.data_dir}/{mode}.tsv'
-            with open(input_file, 'r') as f:
-                input_lines = f.readlines()[1:]  # Skipping headers at index 0
-
-            queries, raw_sentences = [], []
-            for input_line in input_lines:
-                parts = input_line.strip().split()
-                raw_sentences.append(int(parts[-1]))
-                queries.append(' '.join(parts[:-1]))
-
-            infold = input_file[: input_file.rfind('/')]
-
-            logging.info(f'Three most popular classes during {mode}ing')
-            total_sents, sent_label_freq = get_label_stats(raw_sentences, infold + f'/{mode}_sentence_stats.tsv')
-
-            if mode == 'train':
-                self.class_weights = calc_class_weights(sent_label_freq)
-                logging.info(f'Class weights are - {self.class_weights}')
-
-            logging.info(f'Total Sentences - {total_sents}')
-            logging.info(f'Sentence class frequencies - {sent_label_freq}')
diff --git a/nemo/collections/nlp/data/datasets/token_classification_dataset.py b/nemo/collections/nlp/data/datasets/token_classification_dataset.py
index cac15d50d2c5..b91db4eaabc6 100644
--- a/nemo/collections/nlp/data/datasets/token_classification_dataset.py
+++ b/nemo/collections/nlp/data/datasets/token_classification_dataset.py
@@ -1,6 +1,7 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
 # Copyright 2018 The Google AI Language Team Authors and
 # The HuggingFace Inc. team.
-# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,6 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# =============================================================================
 
 """
 Utility functions for Token Classification NLP tasks
@@ -23,13 +25,12 @@
 import itertools
 import os
 import pickle
-import random
 
 import numpy as np
 from torch.utils.data import Dataset
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets import datasets_utils
+from nemo.collections.nlp.data.datasets.datasets_utils.data_preprocessing import get_label_stats, get_stats
 
 __all__ = ['BertTokenClassificationDataset', 'BertTokenClassificationInferDataset']
 
@@ -77,7 +78,7 @@ def get_features(
         words = query.strip().split()
 
         # add bos token
-        subtokens = ['[CLS]']
+        subtokens = [tokenizer.cls_token]
         loss_mask = [1 - ignore_start_end]
         subtokens_mask = [0]
         if with_label:
@@ -98,7 +99,7 @@ def get_features(
             if with_label:
                 labels.extend([query_labels[j]] * len(word_tokens))
         # add eos token
-        subtokens.append('[SEP]')
+        subtokens.append(tokenizer.sep_token)
         loss_mask.append(1 - ignore_start_end)
         subtokens_mask.append(0)
         sent_lengths.append(len(subtokens))
@@ -113,12 +114,12 @@ def get_features(
 
     max_seq_length = min(max_seq_length, max(sent_lengths))
     logging.info(f'Max length: {max_seq_length}')
-    datasets_utils.get_stats(sent_lengths)
+    get_stats(sent_lengths)
     too_long_count = 0
 
     for i, subtokens in enumerate(all_subtokens):
         if len(subtokens) > max_seq_length:
-            subtokens = ['[CLS]'] + subtokens[-max_seq_length + 1 :]
+            subtokens = [tokenizer.cls_token] + subtokens[-max_seq_length + 1 :]
             all_input_mask[i] = [1] + all_input_mask[i][-max_seq_length + 1 :]
             all_loss_mask[i] = [int(not ignore_start_end)] + all_loss_mask[i][-max_seq_length + 1 :]
             all_subtokens_mask[i] = [0] + all_subtokens_mask[i][-max_seq_length + 1 :]
@@ -127,7 +128,7 @@ def get_features(
                 all_labels[i] = [pad_id] + all_labels[i][-max_seq_length + 1 :]
             too_long_count += 1
 
-        all_input_ids.append([tokenizer.tokens_to_ids(t) for t in subtokens])
+        all_input_ids.append(tokenizer.tokens_to_ids(subtokens))
 
         if len(subtokens) < max_seq_length:
             extra = max_seq_length - len(subtokens)
@@ -144,14 +145,14 @@ def get_features(
     logging.warning(f'{too_long_count} are longer than {max_seq_length}')
 
     for i in range(min(len(all_input_ids), 5)):
-        logging.debug("*** Example ***")
-        logging.debug("i: %s", i)
-        logging.debug("subtokens: %s", " ".join(list(map(str, all_subtokens[i]))))
-        logging.debug("loss_mask: %s", " ".join(list(map(str, all_loss_mask[i]))))
-        logging.debug("input_mask: %s", " ".join(list(map(str, all_input_mask[i]))))
-        logging.debug("subtokens_mask: %s", " ".join(list(map(str, all_subtokens_mask[i]))))
+        logging.info("*** Example ***")
+        logging.info("i: %s", i)
+        logging.info("subtokens: %s", " ".join(list(map(str, all_subtokens[i]))))
+        logging.info("loss_mask: %s", " ".join(list(map(str, all_loss_mask[i]))))
+        logging.info("input_mask: %s", " ".join(list(map(str, all_input_mask[i]))))
+        logging.info("subtokens_mask: %s", " ".join(list(map(str, all_subtokens_mask[i]))))
         if with_label:
-            logging.debug("labels: %s", " ".join(list(map(str, all_labels[i]))))
+            logging.info("labels: %s", " ".join(list(map(str, all_labels[i]))))
     return (all_input_ids, all_segment_ids, all_input_mask, all_loss_mask, all_subtokens_mask, all_labels)
 
 
@@ -175,7 +176,6 @@ class BertTokenClassificationDataset(Dataset):
         tokenizer (Tokenizer): such as NemoBertTokenizer
         num_samples (int): number of samples you want to use for the dataset.
             If -1, use all dataset. Useful for testing.
-        shuffle (bool): whether to shuffle your data.
         pad_label (str): pad value use for labels.
             by default, it's the neutral label.
         label_ids (dict): label_ids (dict): dict to map labels to label ids.
@@ -196,7 +196,6 @@ def __init__(
         max_seq_length,
         tokenizer,
         num_samples=-1,
-        shuffle=False,
         pad_label='O',
         label_ids=None,
         ignore_extra_tokens=False,
@@ -212,7 +211,11 @@ def __init__(
             if not filename.endswith('.txt'):
                 raise ValueError("{text_file} should have extension .txt")
 
-            features_pkl = os.path.join(data_dir, filename[:-4] + "_features.pkl")
+            tokenizer_type = type(tokenizer.tokenizer).__name__
+            vocab_size = getattr(tokenizer, "vocab_size", 0)
+            features_pkl = os.path.join(
+                data_dir, "cached_{}_{}_{}_{}".format(filename, tokenizer_type, str(max_seq_length), str(vocab_size)),
+            )
             label_ids_pkl = os.path.join(data_dir, "label_ids.pkl")
 
         if use_cache and os.path.exists(features_pkl) and os.path.exists(label_ids_pkl):
@@ -241,12 +244,9 @@ def __init__(
             if len(labels_lines) != len(text_lines):
                 raise ValueError("Labels file should contain labels for every word")
 
-            if shuffle or num_samples > 0:
+            if num_samples > 0:
                 dataset = list(zip(text_lines, labels_lines))
-                random.shuffle(dataset)
-
-                if num_samples > 0:
-                    dataset = dataset[:num_samples]
+                dataset = dataset[:num_samples]
 
                 dataset = list(zip(*dataset))
                 text_lines = dataset[0]
@@ -308,7 +308,7 @@ def __init__(
         infold = text_file[: text_file.rfind('/')]
         merged_labels = itertools.chain.from_iterable(self.all_labels)
         logging.info('Three most popular labels')
-        _, self.label_frequencies = datasets_utils.get_label_stats(merged_labels, infold + '/label_stats.tsv')
+        _, self.label_frequencies, _ = get_label_stats(merged_labels, infold + '/label_stats.tsv')
 
         # save label_ids
         out = open(infold + '/label_ids.csv', 'w')
diff --git a/nemo/collections/nlp/data/tokenizers/__init__.py b/nemo/collections/nlp/data/tokenizers/__init__.py
index fb7847c44171..66961c8cd800 100644
--- a/nemo/collections/nlp/data/tokenizers/__init__.py
+++ b/nemo/collections/nlp/data/tokenizers/__init__.py
@@ -18,5 +18,6 @@
 from nemo.collections.nlp.data.tokenizers.char_tokenizer import CharTokenizer
 from nemo.collections.nlp.data.tokenizers.gpt2_tokenizer import NemoGPT2Tokenizer
 from nemo.collections.nlp.data.tokenizers.sentencepiece_tokenizer import SentencePieceTokenizer
+from nemo.collections.nlp.data.tokenizers.tokenizer_utils import *
 from nemo.collections.nlp.data.tokenizers.word_tokenizer import WordTokenizer
 from nemo.collections.nlp.data.tokenizers.youtokentome_tokenizer import YouTokenToMeTokenizer
diff --git a/nemo/collections/nlp/data/tokenizers/bert_tokenizer.py b/nemo/collections/nlp/data/tokenizers/bert_tokenizer.py
index 61f86c5d41a3..b4862cfe7370 100644
--- a/nemo/collections/nlp/data/tokenizers/bert_tokenizer.py
+++ b/nemo/collections/nlp/data/tokenizers/bert_tokenizer.py
@@ -16,9 +16,9 @@
 
 import re
 
-from transformers import AlbertTokenizer, BertTokenizer, RobertaTokenizer
-
+from nemo import logging
 from nemo.collections.nlp.data.tokenizers.tokenizer_spec import TokenizerSpec
+from nemo.collections.nlp.data.tokenizers.tokenizer_utils import MODEL_SPECIAL_TOKENS, TOKENIZERS
 
 __all__ = [
     'NemoBertTokenizer',
@@ -62,41 +62,42 @@ def remove_spaces(text):
 
 
 class NemoBertTokenizer(TokenizerSpec):
-    def __init__(
-        self,
-        pretrained_model=None,
-        vocab_file=None,
-        bert_derivate='bert',
-        special_tokens={
-            "unk_token": "[UNK]",
-            "sep_token": "[SEP]",
-            "eos_token": "[SEP]",
-            "pad_token": "[PAD]",
-            "cls_token": "[CLS]",
-            "bos_token": "[CLS]",
-            "mask_token": "[MASK]",
-        },
-        do_lower_case=True,
-    ):
-
-        if bert_derivate == 'bert':
-            tokenizer_cls = BertTokenizer
-        elif bert_derivate == 'albert':
-            tokenizer_cls = AlbertTokenizer
-        elif bert_derivate == 'roberta':
-            tokenizer_cls = RobertaTokenizer
-        if pretrained_model is not None:
+    def __init__(self, pretrained_model=None, vocab_file=None, bert_derivative='bert', do_lower_case=False):
+        '''
+        The user needs to specify pretrained_model name or vocab file and bert_derivative
+
+        pretrained_model (str):name of the pretrained model from the hugging face list,
+            for example: bert-base-cased
+            To see the list of pretrained models, call:
+            huggingface_utils.get_bert_models_list()
+        vocab_file: File containing the vocabulary.
+        bert_derivative: for example: 'bert', 'roberta', 'albert'. Only used when vocab_file specified.
+        '''
+        if pretrained_model:
+            bert_derivative = pretrained_model.split('-')[0]
+            logging.info(f'Deriving bert model type from pretrained model name.')
+
+        if bert_derivative in TOKENIZERS:
+            tokenizer_cls = TOKENIZERS[bert_derivative]
+        else:
+            raise ValueError(
+                "Bert_derivative value {bert_derivative} is not currently supported"
+                + " Please choose from the following list: {TOKENIZERS.keys()}"
+            )
+
+        if pretrained_model:
             self.tokenizer = tokenizer_cls.from_pretrained(pretrained_model)
-        elif vocab_file is not None:
-            self.tokenizer = tokenizer_cls(vocab_file=vocab_file)
+        elif vocab_file:
+            self.tokenizer = tokenizer_cls(vocab_file=vocab_file, do_lower_case=do_lower_case)
         else:
             raise ValueError("either 'vocab_file' or 'pretrained_model' has to be specified")
 
         if hasattr(self.tokenizer, "vocab"):
             self.vocab_size = len(self.tokenizer.vocab)
+
+        special_tokens = MODEL_SPECIAL_TOKENS[bert_derivative]
         for k, v in special_tokens.items():
             setattr(self, k, v)
-
         self.never_split = tuple(special_tokens.values())
 
     def text_to_tokens(self, text):
diff --git a/nemo/collections/nlp/data/tokenizers/tokenizer_spec.py b/nemo/collections/nlp/data/tokenizers/tokenizer_spec.py
index fffe53a5ca6c..ab587e86098f 100644
--- a/nemo/collections/nlp/data/tokenizers/tokenizer_spec.py
+++ b/nemo/collections/nlp/data/tokenizers/tokenizer_spec.py
@@ -46,4 +46,4 @@ def ids_to_text(self, ids):
         pass
 
     def add_special_tokens(self, special_tokens: List[str]):
-        pass
+        raise NotImplementedError("To be implemented")
diff --git a/nemo/collections/nlp/data/tokenizers/tokenizer_utils.py b/nemo/collections/nlp/data/tokenizers/tokenizer_utils.py
new file mode 100644
index 000000000000..0cdfb7e46bfe
--- /dev/null
+++ b/nemo/collections/nlp/data/tokenizers/tokenizer_utils.py
@@ -0,0 +1,121 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+
+from transformers import AlbertTokenizer, BertTokenizer, RobertaTokenizer
+
+import nemo
+from nemo.utils import logging
+
+try:
+    __megatron_utils_satisfied = True
+    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_utils import (
+        get_megatron_vocab_file,
+        is_lower_cased_megatron,
+    )
+
+except Exception as e:
+    logging.error('Failed to import Megatron utils: `{}` ({})'.format(str(e), type(e)))
+    __megatron_utils_satisfied = False
+
+
+__all__ = ['MODEL_SPECIAL_TOKENS', 'TOKENIZERS', 'get_tokenizer', 'get_bert_special_tokens']
+
+MODEL_SPECIAL_TOKENS = {
+    'bert': {
+        'unk_token': '[UNK]',
+        'sep_token': '[SEP]',
+        'pad_token': '[PAD]',
+        'bos_token': '[CLS]',
+        'mask_token': '[MASK]',
+        'eos_token': '[SEP]',
+        'cls_token': '[CLS]',
+    },
+    'roberta': {
+        'unk_token': '<unk>',
+        'sep_token': '</s>',
+        'pad_token': '<pad>',
+        'bos_token': '<s>',
+        'mask_token': '<mask>',
+        'eos_token': '</s>',
+        'cls_token': '<s>',
+    },
+    'albert': {
+        'unk_token': '<unk>',
+        'sep_token': '[SEP]',
+        'pad_token': '<pad>',
+        'bos_token': '[CLS]',
+        'mask_token': '[MASK]',
+        'eos_token': '[SEP]',
+        'cls_token': '[CLS]',
+    },
+}
+
+
+TOKENIZERS = {'bert': BertTokenizer, 'albert': AlbertTokenizer, 'roberta': RobertaTokenizer}
+
+
+def get_bert_special_tokens(bert_derivative):
+    return MODEL_SPECIAL_TOKENS[bert_derivative]
+
+
+def get_tokenizer(
+    tokenizer_name,
+    pretrained_model_name,
+    tokenizer_model=None,
+    special_tokens=None,
+    vocab_file=None,
+    do_lower_case=False,
+):
+    '''
+    Args:
+    tokenizer_name: sentencepiece or nemobert
+    pretrained_mode_name ('str'): name of the pretrained model from the hugging face list or 'megatron',
+        for example: bert-base-cased
+        To see the list of pretrained models, use: nemo_nlp.nm.trainables.get_bert_models_list()
+    tokenizer_model (path): only used for sentencepiece tokenizer
+    special_tokens (dict): dict of special tokens (Optional)
+    vocab_file (str): path to vocab file
+    do_lower_case (bool): (whether to apply lower cased) - only applicable when tokenizer is build with vocab file
+    '''
+    # Check if we can use Megatron utils.
+    if __megatron_utils_satisfied:
+        if 'megatron' in pretrained_model_name:
+            do_lower_case = is_lower_cased_megatron(pretrained_model_name)
+            vocab_file = get_megatron_vocab_file(pretrained_model_name)
+            return nemo.collections.nlp.data.tokenizers.NemoBertTokenizer(
+                vocab_file=vocab_file, do_lower_case=do_lower_case
+            )
+
+    if tokenizer_name == 'nemobert':
+        tokenizer = nemo.collections.nlp.data.tokenizers.NemoBertTokenizer(
+            pretrained_model=pretrained_model_name, vocab_file=vocab_file, do_lower_case=do_lower_case
+        )
+    elif tokenizer_name == 'sentencepiece':
+        if not os.path.exists(tokenizer_model):
+            raise FileNotFoundError(f'{tokenizer_model} tokenizer model not found')
+
+        tokenizer = nemo.collections.nlp.data.tokenizers.SentencePieceTokenizer(model_path=tokenizer_model)
+        model_type = pretrained_model_name.split('-')[0]
+        if special_tokens is None:
+            if model_type not in MODEL_SPECIAL_TOKENS:
+                logging.info(f'No special tokens found for {model_type}.')
+            else:
+                special_tokens = MODEL_SPECIAL_TOKENS[model_type]
+        tokenizer.add_special_tokens(special_tokens)
+    else:
+        raise ValueError(f'{tokenizer_name} is not supported')
+    return tokenizer
diff --git a/nemo/collections/nlp/metrics/bleu.py b/nemo/collections/nlp/metrics/bleu.py
index bab9c5f4c0f6..a49eb0a8c10b 100644
--- a/nemo/collections/nlp/metrics/bleu.py
+++ b/nemo/collections/nlp/metrics/bleu.py
@@ -1,3 +1,20 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# =============================================================================
 # Copyright 2017 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,6 +29,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+
 """Python implementation of BLEU and smooth-BLEU.
 This module provides a Python implementation of BLEU and smooth-BLEU.
 Smooth BLEU is computed following the method outlined in the paper:
diff --git a/nemo/collections/nlp/metrics/sacrebleu.py b/nemo/collections/nlp/metrics/sacrebleu.py
index 586b19bf2d30..5130dd9633ca 100755
--- a/nemo/collections/nlp/metrics/sacrebleu.py
+++ b/nemo/collections/nlp/metrics/sacrebleu.py
@@ -1,6 +1,23 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# =============================================================================
 # Copyright 2017--2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"). You may not
@@ -13,6 +30,8 @@
 # an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
+# =============================================================================
+
 """
 SacreBLEU provides hassle-free computation of shareable, comparable, and reproducible BLEU scores.
 Inspired by Rico Sennrich's `multi-bleu-detok.perl`, it produces the official WMT scores but works with plain text.
diff --git a/nemo/collections/nlp/metrics/squad_metrics.py b/nemo/collections/nlp/metrics/squad_metrics.py
index e5f0af1e2517..07e5cdb6d841 100644
--- a/nemo/collections/nlp/metrics/squad_metrics.py
+++ b/nemo/collections/nlp/metrics/squad_metrics.py
@@ -1,27 +1,27 @@
-"""
-Copyright 2018 The Google AI Language Team Authors and
-The HuggingFace Inc. team.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
 import collections
 
 from transformers.tokenization_bert import BasicTokenizer
 
 from nemo import logging
-from nemo.collections.nlp.data.datasets.datasets_utils import get_tokens, normalize_answer
+from nemo.collections.nlp.utils.data_utils import get_tokens, normalize_answer
 
 __all__ = [
     'f1_score',
@@ -31,7 +31,6 @@
     'merge_eval',
     'find_all_best_thresh',
     'find_best_thresh',
-    'normalize_answer',
     '_get_best_indexes',
     'get_final_text',
 ]
diff --git a/nemo/collections/nlp/nm/data_layers/__init__.py b/nemo/collections/nlp/nm/data_layers/__init__.py
index 5b5d3dde539f..0c42605631ac 100644
--- a/nemo/collections/nlp/nm/data_layers/__init__.py
+++ b/nemo/collections/nlp/nm/data_layers/__init__.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # =============================================================================
 
+from nemo.collections.nlp.nm.data_layers.bert_inference_datalayer import *
 from nemo.collections.nlp.nm.data_layers.glue_benchmark_datalayer import *
 from nemo.collections.nlp.nm.data_layers.joint_intent_slot_datalayer import *
 from nemo.collections.nlp.nm.data_layers.lm_bert_datalayer import *
@@ -21,6 +22,7 @@
 from nemo.collections.nlp.nm.data_layers.machine_translation_datalayer import *
 from nemo.collections.nlp.nm.data_layers.punctuation_capitalization_datalayer import *
 from nemo.collections.nlp.nm.data_layers.qa_squad_datalayer import *
+from nemo.collections.nlp.nm.data_layers.state_tracking_sgd_datalayer import *
 from nemo.collections.nlp.nm.data_layers.state_tracking_trade_datalayer import *
 from nemo.collections.nlp.nm.data_layers.text_classification_datalayer import *
 from nemo.collections.nlp.nm.data_layers.text_datalayer import *
diff --git a/nemo/collections/nlp/nm/data_layers/bert_inference_datalayer.py b/nemo/collections/nlp/nm/data_layers/bert_inference_datalayer.py
new file mode 100644
index 000000000000..2da78552084d
--- /dev/null
+++ b/nemo/collections/nlp/nm/data_layers/bert_inference_datalayer.py
@@ -0,0 +1,68 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
+from nemo.core import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['BertInferDataLayer']
+
+
+class BertInferDataLayer(TextDataLayer):
+    """
+    Data layer to run infernce with BERT (get final hidden layer).
+
+    Args:
+        tokenizer (TokenizerSpec): tokenizer
+        dataset (str): directory or a single file with dataset documents
+        max_seq_length (int): maximum allowed length of the text segments
+        mask_probability (float): probability of masking input sequence tokens
+        batch_size (int): batch size in segments
+        short_seeq_prob (float): Probability of creating sequences which are
+            shorter than the maximum length.
+            Defualts to 0.1.
+    """
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+
+        input_ids: indices of tokens which constitute batches of text segments
+            0: AxisType(BatchTag)
+
+            1: AxisType(TimeTag)
+
+        input_type_ids: indices of token types (e.g., sentences A & B in BERT)
+            0: AxisType(BatchTag)
+
+            1: AxisType(TimeTag)
+
+        input_mask: bool tensor with 0s in place of tokens to be masked
+            0: AxisType(BatchTag)
+
+            1: AxisType(TimeTag)
+
+        """
+        return {
+            "input_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
+            "input_mask": NeuralType(('B', 'T'), ChannelType()),
+        }
+
+    def __init__(self, dataset_type, dataset_params, batch_size=1, shuffle=False):
+
+        super().__init__(dataset_type, dataset_params, batch_size=batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py
index ac5ae86cca6c..863577165203 100644
--- a/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/glue_benchmark_datalayer.py
@@ -17,6 +17,7 @@
 from nemo.collections.nlp.data import GLUEDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core import CategoricalValuesType, ChannelType, NeuralType, RegressionValuesType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['GlueClassificationDataLayer', 'GlueRegressionDataLayer']
 
@@ -29,19 +30,32 @@ class GlueClassificationDataLayer(TextDataLayer):
     All the data processing is done in GLUEDataset.
 
     Args:
+        data_dir (str): data directory path
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int): maximum allowed length of the text segments .
+        processor (DataProcessor): data processor.
+        evaluate (bool): true if data layer is used for evaluation. Default: False.
+        batch_size (int): batch size in segments
+        shuffle (bool): whether to shuffle data or not. Default: False.
         dataset_type (GLUEDataset):
                 the dataset that needs to be converted to DataLayerNM
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+
+        input_ids:
+            indices of tokens which constitute batches of text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        labels:
+            integer indices for sentence classication prediction
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(CategoricalTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
@@ -55,19 +69,19 @@ def __init__(
         max_seq_length,
         processor,
         evaluate=False,
-        token_params={},
         shuffle=False,
         batch_size=64,
         dataset_type=GLUEDataset,
+        use_data_cache=False,
     ):
         dataset_params = {
             'data_dir': data_dir,
             'output_mode': 'classification',
             'processor': processor,
             'evaluate': evaluate,
-            'token_params': token_params,
             'tokenizer': tokenizer,
             'max_seq_length': max_seq_length,
+            'use_data_cache': use_data_cache,
         }
         super().__init__(dataset_type, dataset_params, batch_size, shuffle)
 
@@ -80,19 +94,32 @@ class GlueRegressionDataLayer(TextDataLayer):
     All the data processing is done in GLUEDataset.
 
     Args:
+        data_dir (str): data directory path
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int): maximum allowed length of the text segments .
+        processor (DataProcessor): data processor.
+        evaluate (bool): true if data layer is used for evaluation. Default: False.
+        batch_size (int): batch size in segments
+        shuffle (bool): whether to shuffle data or not. Default: False.
         dataset_type (GLUEDataset):
                 the dataset that needs to be converted to DataLayerNM
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+
+        input_ids:
+            indices of tokens which constitute batches of text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        labels:
+            float for sentence regression prediction
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(RegressionTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
@@ -106,19 +133,19 @@ def __init__(
         max_seq_length,
         processor,
         evaluate=False,
-        token_params={},
         shuffle=False,
         batch_size=64,
         dataset_type=GLUEDataset,
+        use_data_cache=False,
     ):
         dataset_params = {
             'data_dir': data_dir,
             'output_mode': 'regression',
             'processor': processor,
             'evaluate': evaluate,
-            'token_params': token_params,
             'tokenizer': tokenizer,
             'max_seq_length': max_seq_length,
+            'use_data_cache': use_data_cache,
         }
 
         super().__init__(dataset_type, dataset_params, batch_size, shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py
index c306cfcccc04..2ca627eb20d7 100644
--- a/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/joint_intent_slot_datalayer.py
@@ -16,7 +16,8 @@
 
 from nemo.collections.nlp.data import BertJointIntentSlotDataset, BertJointIntentSlotInferDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
-from nemo.core import ChannelType, NeuralType
+from nemo.core import ChannelType, LabelsType, MaskType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BertJointIntentSlotDataLayer', 'BertJointIntentSlotInferDataLayer']
 
@@ -28,37 +29,53 @@ class BertJointIntentSlotDataLayer(TextDataLayer):
 
     All the data processing is done in BertJointIntentSlotDataset.
 
-    input_mask: used to ignore some of the input tokens like paddings
-
-    loss_mask: used to mask and ignore tokens in the loss function
-
-    subtokens_mask: used to ignore the outputs of unwanted tokens in
-    the inference and evaluation like the start and end tokens
-
     Args:
-        dataset (BertJointIntentSlotDataset):
+        input_file (str):
+            data file
+        slot_file (str):
+            file to slot labels, each line corresponding to
+            slot labels for a sentence in input_file. No header.
+        pad_label (int): pad value use for slot labels
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int):
+            max sequence length minus 2 for [CLS] and [SEP]
+        dataset_type (BertJointIntentSlotDataset):
             the dataset that needs to be converted to DataLayerNM
+        shuffle (bool): whether to shuffle data or not. Default: False.
+        batch_size: text segments batch size
+        ignore_extra_tokens (bool): whether or not to ignore extra tokens
+        ignore_start_end (bool)": whether or not to ignore start and end
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+
+        input_ids:
+            indices of tokens which constitute batches of text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        loss_mask:
+            used to mask and ignore tokens in the loss function
+        subtokens_mask:
+            used to ignore the outputs of unwanted tokens in
+            the inference and evaluation like the start and end tokens
+        intents:
+            intents labels
+        slots:
+            slots labels
         """
         return {
-            # "input_ids":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask":     NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "intents": NeuralType({0: AxisType(BatchTag)}),
-            # "slots":          NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "loss_mask": NeuralType(('B', 'T'), ChannelType()),
+            "loss_mask": NeuralType(('B', 'T'), MaskType()),
             "subtokens_mask": NeuralType(('B', 'T'), ChannelType()),
-            "intents": NeuralType(tuple('B'), ChannelType()),
-            "slots": NeuralType(('B', 'T'), ChannelType()),
+            "intents": NeuralType(tuple('B'), LabelsType()),
+            "slots": NeuralType(('B', 'T'), LabelsType()),
         }
 
     def __init__(
@@ -73,6 +90,7 @@ def __init__(
         batch_size=64,
         ignore_extra_tokens=False,
         ignore_start_end=False,
+        do_lower_case=False,
         dataset_type=BertJointIntentSlotDataset,
     ):
         dataset_params = {
@@ -82,11 +100,11 @@ def __init__(
             'tokenizer': tokenizer,
             'max_seq_length': max_seq_length,
             'num_samples': num_samples,
-            'shuffle': shuffle,
             'ignore_extra_tokens': ignore_extra_tokens,
             'ignore_start_end': ignore_start_end,
+            'do_lower_case': do_lower_case,
         }
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
 
 
 class BertJointIntentSlotInferDataLayer(TextDataLayer):
@@ -96,28 +114,36 @@ class BertJointIntentSlotInferDataLayer(TextDataLayer):
 
     All the data processing is done in BertJointIntentSlotInferDataset.
 
-    input_mask: used to ignore some of the input tokens like paddings
-
-    loss_mask: used to mask and ignore tokens in the loss function
-
-    subtokens_mask: used to ignore the outputs of unwanted tokens in
-    the inference and evaluation like the start and end tokens
-
     Args:
-        dataset (BertJointIntentSlotInferDataset):
+        queries (list): list of queries for inference
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int):
+            max sequence length minus 2 for [CLS] and [SEP]
+        dataset_type (BertJointIntentSlotDataset):
             the dataset that needs to be converted to DataLayerNM
+        shuffle (bool): whether to shuffle data or not. Default: False.
+        do_lower_case (bool): whether to make the sentence all lower case
+        batch_size: text segments batch size
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        
+        input_ids:
+            indices of tokens which constitute batches of text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        loss_mask:
+            used to mask and ignore tokens in the loss function
+        subtokens_mask:
+            used to ignore the outputs of unwanted tokens in
+            the inference and evaluation like the start and end tokens
         """
         return {
-            # "input_ids":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask":     NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
@@ -125,6 +151,20 @@ def output_ports(self):
             "subtokens_mask": NeuralType(('B', 'T'), ChannelType()),
         }
 
-    def __init__(self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertJointIntentSlotInferDataset):
-        dataset_params = {'queries': queries, 'tokenizer': tokenizer, 'max_seq_length': max_seq_length}
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle=False)
+    def __init__(
+        self,
+        queries,
+        tokenizer,
+        max_seq_length,
+        batch_size=1,
+        shuffle=False,
+        do_lower_case=False,
+        dataset_type=BertJointIntentSlotInferDataset,
+    ):
+        dataset_params = {
+            'queries': queries,
+            'tokenizer': tokenizer,
+            'max_seq_length': max_seq_length,
+            'do_lower_case': do_lower_case,
+        }
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py
index 98c1ba23c10f..8a8e19aaccdd 100644
--- a/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/lm_bert_datalayer.py
@@ -25,14 +25,15 @@
 from nemo.backends.pytorch import DataLayerNM
 from nemo.collections.nlp.data import BertPretrainingDataset, BertPretrainingPreprocessedDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
-from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.core import ChannelType, LabelsType, MaskType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BertPretrainingDataLayer', 'BertPretrainingPreprocessedDataLayer']
 
 
 class BertPretrainingDataLayer(TextDataLayer):
     """
-    Data layer for masked language modeling task.
+    Data layer for masked language modeling task for text data.
 
     Args:
         tokenizer (TokenizerSpec): tokenizer
@@ -42,29 +43,36 @@ class BertPretrainingDataLayer(TextDataLayer):
         batch_size (int): batch size in segments
         short_seeq_prob (float): Probability of creating sequences which are
             shorter than the maximum length.
-            Defualts to 0.1.
+            Defaults to 0.1.
+        shuffle (bool): whether to shuffle data or not. Default: False.
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of masked text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        output_ids: indices of tokens which constitute batches of unmasked text segments
+        output_mask: bool tensor with 0s in place of tokens to be masked
+        labels: 0 or 1 for next sentence prediction classification
         """
         return {
-            # "input_ids":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask":     NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "output_ids":     NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "output_mask":    NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels":         NeuralType({0: AxisType(BatchTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "output_ids": NeuralType(('B', 'T'), ChannelType()),
-            "output_mask": NeuralType(('B', 'T'), ChannelType()),
+            "output_ids": NeuralType(('B', 'T'), LabelsType()),
+            "output_mask": NeuralType(('B', 'T'), MaskType()),
             "labels": NeuralType(tuple('B'), LabelsType()),
         }
 
-    def __init__(self, tokenizer, dataset, max_seq_length, mask_probability, short_seq_prob=0.1, batch_size=64):
+    def __init__(
+        self, tokenizer, dataset, max_seq_length, mask_probability, short_seq_prob=0.1, batch_size=64, shuffle=False
+    ):
         dataset_params = {
             'tokenizer': tokenizer,
             'dataset': dataset,
@@ -72,45 +80,47 @@ def __init__(self, tokenizer, dataset, max_seq_length, mask_probability, short_s
             'mask_probability': mask_probability,
             'short_seq_prob': short_seq_prob,
         }
-        super().__init__(BertPretrainingDataset, dataset_params, batch_size, shuffle=False)
+        super().__init__(BertPretrainingDataset, dataset_params, batch_size, shuffle=shuffle)
 
 
 class BertPretrainingPreprocessedDataLayer(DataLayerNM):
     """
-    Data layer for masked language modeling task.
+    Data layer for masked language modeling task for preprocessed data.
 
     Args:
-        tokenizer (TokenizerSpec): tokenizer
         dataset (str): directory or a single file with dataset documents
         max_seq_length (int): maximum allowed length of the text segments
-        mask_probability (float): probability of masking input sequence tokens
         batch_size (int): batch size in segments
-        short_seeq_prob (float): Probability of creating sequences which are
-            shorter than the maximum length.
-            Defualts to 0.1.
+        mode (str): model execution mode, e.g. "training"
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of masked text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        output_ids: indices of tokens which constitute batches of unmasked text segments
+        output_mask: bool tensor with 0s in place of tokens to be masked
+        labels: 0 or 1 for next sentence prediction classification
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "output_ids": NeuralType(('B', 'T'), ChannelType()),
-            "output_mask": NeuralType(('B', 'T'), ChannelType()),
+            "output_ids": NeuralType(('B', 'T'), LabelsType()),
+            "output_mask": NeuralType(('B', 'T'), MaskType()),
             "labels": NeuralType(tuple('B'), LabelsType()),
         }
 
-    def __init__(self, dataset, max_pred_length, batch_size=64, training=True):
-
+    def __init__(
+        self, dataset, max_pred_length, mode, batch_size=64,
+    ):
+        super().__init__()
         if os.path.isdir(dataset):
             self.files = [
                 os.path.join(dataset, f) for f in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, f))
@@ -121,14 +131,13 @@ def __init__(self, dataset, max_pred_length, batch_size=64, training=True):
         self.num_files = len(self.files)
         self._batch_size = batch_size
         self.max_pred_length = max_pred_length
-        self.training = training
+        self.mode = mode
         total_length = 0
         for f in self.files:
             fp = h5py.File(f, 'r')
             total_length += len(fp['input_ids'])
             fp.close()
         self.total_length = total_length
-        super().__init__()
 
     def _collate_fn(self, x):
         num_components = len(x[0])
@@ -156,7 +165,7 @@ def dataset(self):
     @property
     def data_iterator(self):
         while True:
-            if self.training:
+            if self.mode == "train":
                 random.shuffle(self.files)
             for f_id in range(self.num_files):
                 data_file = self.files[f_id]
@@ -173,3 +182,5 @@ def data_iterator(self):
                 )
                 for x in train_dataloader:
                     yield x
+            if self.mode != "train":
+                break
diff --git a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py
index ebd1b2a738d0..7c9df0695991 100644
--- a/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/lm_transformer_datalayer.py
@@ -17,6 +17,7 @@
 from nemo.collections.nlp.data import LanguageModelingDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['LanguageModelingDataLayer']
 
@@ -29,42 +30,40 @@ class LanguageModelingDataLayer(TextDataLayer):
         dataset (str): path to text document with data
         tokenizer (TokenizerSpec): tokenizer
         max_seq_length (int): maximum allowed length of the text segments
+        batch_size (int): batch size
         batch_step (int): how many tokens to skip between two successive
             segments of text when constructing batches
+        dataset_type (Dataset):
+                the underlying dataset. Default: LanguageModelingDataset
+        shuffle (bool): whether to shuffle data or not. Default: False.
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
         input_ids: indices of tokens which constitute batches of text segments
-            0: AxisType(BatchTag)
-
-            1: AxisType(TimeTag)
-
         input_mask: bool tensor with 0s in place of tokens to be masked
-            0: AxisType(BatchTag)
-
-            1: AxisType(TimeTag)
-
         labels: indices of tokens which should be predicted from each of the
             corresponding tokens in input_ids; for left-to-right language
             modeling equals to input_ids shifted by 1 to the right
-            0: AxisType(BatchTag)
-
-            1: AxisType(TimeTag)
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
             "labels": NeuralType(('B', 'T'), LabelsType()),
         }
 
     def __init__(
-        self, dataset, tokenizer, max_seq_length, batch_size, batch_step=128, dataset_type=LanguageModelingDataset
+        self,
+        dataset,
+        tokenizer,
+        max_seq_length,
+        batch_size,
+        batch_step=128,
+        dataset_type=LanguageModelingDataset,
+        shuffle=False,
     ):
         dataset_params = {
             'dataset': dataset,
@@ -72,4 +71,4 @@ def __init__(
             'max_seq_length': max_seq_length,
             'batch_step': batch_step,
         }
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle=False)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py
index 44f877f5dcc3..0ff83ae67b90 100644
--- a/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/machine_translation_datalayer.py
@@ -21,6 +21,7 @@
 from nemo.collections.nlp.data import TranslationDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['TranslationDataLayer']
 
@@ -41,35 +42,27 @@ class TranslationDataLayer(TextDataLayer):
             pairs with big difference in sentences length, removing pairs with
             the same tokens in src and tgt, etc; useful for training data layer
             and should not be used in evaluation data layer
+        dataset_type (Dataset):
+                the underlying dataset. Default: TranslationDataset
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
         src_ids: indices of tokens which correspond to source sentences
-
         src_mask: bool tensor with 0s in place of source tokens to be masked
-
         tgt_ids: indices of tokens which correspond to target sentences
-
         tgt_mask: bool tensor with 0s in place of target tokens to be masked
-
         labels: indices of tokens which should be predicted from each of the
             corresponding target tokens in tgt_ids; for standard neural
             machine translation equals to tgt_ids shifted by 1 to the right
-
         sent_ids: indices of the sentences in a batch; important for
             evaluation with external metrics, such as SacreBLEU
 
         """
         return {
-            # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "src_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "tgt_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "sent_ids": NeuralType({0: AxisType(BatchTag)}),
             "src_ids": NeuralType(('B', 'T'), ChannelType()),
             "src_mask": NeuralType(('B', 'T'), ChannelType()),
             "tgt_ids": NeuralType(('B', 'T'), ChannelType()),
diff --git a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py
index e3cfeda2235a..10e943682e5a 100644
--- a/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/punctuation_capitalization_datalayer.py
@@ -16,28 +16,72 @@
 
 from nemo.collections.nlp.data import BertPunctuationCapitalizationDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
-from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.core import ChannelType, LabelsType, MaskType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['PunctuationCapitalizationDataLayer']
 
 
 class PunctuationCapitalizationDataLayer(TextDataLayer):
+    """
+    Data layer for punctuation and capitalization.
+
+    Args:
+        text_file (str): file to sequences, each line should a sentence,
+            No header.
+        label_file (str): file to labels, each line corresponds to
+            word labels for a sentence in the text_file. No header.
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
+        pad_label (str): ad value use for labels.
+            by default, it's the neutral label.
+        punct_label_ids (dict): 
+            dict to map labels to label ids.
+            Starts with pad_label->0 and then increases in alphabetical order
+            For dev set use label_ids generated during training to support
+            cases when not all labels are present in the dev set.
+            For training set label_ids should be None.
+        capit_label_ids (dict):
+            dict to map labels to label ids.
+            Starts with pad_label->0 and then increases in alphabetical order
+            For dev set use label_ids generated during training to support
+            cases when not all labels are present in the dev set.
+            For training set label_ids should be None.
+        num_samples (int):
+            number of samples you want to use for the dataset.
+                If -1, use all dataset. Useful for testing.
+        shuffle (bool): whether to shuffle your data.
+        batch_size (int): batch size
+        ignore_extra_tokens (bool): whether to ignore extra tokens in
+            the loss_mask
+        ignore_start_end (bool):
+            whether to ignore bos and eos tokens in the loss_mask
+        use_cache (bool): whether to use data cache
+        dataset_type (Dataset): Default BertPunctuationCapitalizationDataset.
+    """
+
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of masked text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        loss_mask:
+            used to mask and ignore tokens in the loss function: indices of tokens which constitute batches of unmasked text segments
+        subtokens_mask:
+            used to mask all but the first subtoken of the work, could be useful during inference
+        punct_labels: punctuation label ids
+        capit_labels: capit_labels label ids
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "punct_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "capit_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "loss_mask": NeuralType(('B', 'T'), ChannelType()),
+            "loss_mask": NeuralType(('B', 'T'), MaskType()),
             "subtokens_mask": NeuralType(('B', 'T'), ChannelType()),
             "punct_labels": NeuralType(('B', 'T'), LabelsType()),
             "capit_labels": NeuralType(('B', 'T'), LabelsType()),
@@ -66,7 +110,6 @@ def __init__(
             'max_seq_length': max_seq_length,
             'tokenizer': tokenizer,
             'num_samples': num_samples,
-            'shuffle': shuffle,
             'pad_label': pad_label,
             'punct_label_ids': punct_label_ids,
             'capit_label_ids': capit_label_ids,
@@ -74,4 +117,4 @@ def __init__(
             'ignore_start_end': ignore_start_end,
             'use_cache': use_cache,
         }
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py
index 219632b375f5..2234e3c6f445 100644
--- a/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/qa_squad_datalayer.py
@@ -17,6 +17,7 @@
 from nemo.collections.nlp.data import SquadDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BertQuestionAnsweringDataLayer']
 
@@ -32,36 +33,40 @@ class BertQuestionAnsweringDataLayer(TextDataLayer):
             unanswerable questions.
         doc_stride (int): When splitting up a long document into chunks,
             how much stride to take between chunks.
-        max_query_length (iny): All training files which have a duration less
+        max_query_length (int): All training files which have a duration less
             than min_duration are dropped. Can't be used if the `utt2dur` file
             does not exist. Defaults to None.
         max_seq_length (int): All training files which have a duration more
             than max_duration are dropped. Can't be used if the `utt2dur` file
             does not exist. Defaults to None.
-        mode (str): Use "train" or "dev" to define between
-            training and evaluation.
+        mode (str): Use "train", "eval", or "test" to define between
+            training and evaluation and inference.
         batch_size (int): Batch size. Defaults to 64.
-        dataset_type (class): Question Answering class.
+        dataset_type (Dataset): Question Answering class.
             Defaults to SquadDataset.
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of masked text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        start_positions: indices of tokens which constitute start position of answer
+        end_positions: indices of tokens which constitute end position of answer
+        unique_ids: id of the Question answer example this instance belongs to
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "start_positions": NeuralType({0: AxisType(BatchTag)}),
-            # "end_positions": NeuralType({0: AxisType(BatchTag)}),
-            # "unique_ids": NeuralType({0: AxisType(BatchTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "start_positions": NeuralType(tuple('B'), ChannelType()),
-            "end_positions": NeuralType(tuple('B'), ChannelType()),
             "unique_ids": NeuralType(tuple('B'), ChannelType()),
+            "start_positions": NeuralType(tuple('B'), ChannelType(), optional=True),
+            "end_positions": NeuralType(tuple('B'), ChannelType(), optional=True),
         }
 
     def __init__(
@@ -72,8 +77,10 @@ def __init__(
         doc_stride,
         max_query_length,
         max_seq_length,
-        mode="train",
+        mode,
         batch_size=64,
+        use_cache=True,
+        shuffle=False,
         dataset_type=SquadDataset,
     ):
         dataset_params = {
@@ -83,7 +90,8 @@ def __init__(
             'version_2_with_negative': version_2_with_negative,
             'max_query_length': max_query_length,
             'max_seq_length': max_seq_length,
+            'use_cache': use_cache,
             'doc_stride': doc_stride,
         }
 
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle=False)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_sgd_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_sgd_datalayer.py
new file mode 100644
index 000000000000..d5f76d0ed65b
--- /dev/null
+++ b/nemo/collections/nlp/nm/data_layers/state_tracking_sgd_datalayer.py
@@ -0,0 +1,121 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.backends.pytorch import DataLayerNM
+from nemo.collections.nlp.data.datasets.sgd_dataset.sgd_dataset import SGDDataset
+from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['SGDDataLayer']
+
+
+class SGDDataLayer(DataLayerNM):
+    """
+    Data layer for Schema Guided Dialogue State Tracking Dataset.
+    Args:
+        dataset_split (str): train/ dev/ test,
+        dialogues_processor (obj):  containt dialogue data,
+        dataset_type (Dataset): Dataset Type,
+        shuffle (bool): enables shuffling, default=False
+        num_workers (int): number of workers
+        batch_size (int): batch size
+        pin_memory (bool): enables copying Tensors into CUDA pinned memory before returning them
+    """
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        example_id_num (int): example ids
+        service_id  (int): service ids
+        is_real_example (bool): flag to determine is the example is valid
+        utterance_ids (int): utterance ids
+        utterance_segment (int): Denotes the identity of the sequence. Takes values 0 (system utterance) and 1 (user utterance)
+        utterance_mask (int): Mask which takes the value 0 for padded tokens and 1 otherwise
+        categorical_slot_status (int): The status of each categorical slot in the service
+        cat_slot_status_mask(int): Masks out categorical status for padded cat slots, takes values 0 and 1
+        categorical_slot_values (int): The index of the correct value for each categorical slot
+        cat_slot_values_mask (int): Masks out categorical slots values for slots not used in the service, takes values 0 and 1
+        noncategorical_slot_status (int): The status of each non-categorical slot in the service
+        noncat_slot_status_mask(int): Masks out non-categorical status for padded cat slots, takes values 0 and 1
+        noncategorical_slot_value_start (int): The index of the starting subword corresponding to the slot span for a non-categorical slot value
+        noncategorical_slot_value_end (int): The index of the ending (inclusive) subword corresponding to the slot span for a non-categorical slot value
+        start_char_idx (int): Start character indices in the original utterance corresponding to the tokens
+        end_char_idx (int): Inclusive end character indices in the original utterance corresponding to the tokens
+        num_slots (int): Total number of slots present in the service
+        requested_slot_status (int): Takes value 1 if the corresponding slot is requested, 0 otherwise
+        req_slot_mask (int): Masks requested slots not used for the particular service
+        intent_status_mask (long): Masks out padded intents in the service, takes values 0 and 1
+        intent_status_labels (int): Intent labels
+
+        """
+        return {
+            "example_id_num": NeuralType(('B'), ChannelType()),
+            "service_id": NeuralType(('B'), ChannelType()),
+            "is_real_example": NeuralType(('B'), ChannelType()),
+            "utterance_ids": NeuralType(('B', 'T'), ChannelType()),
+            "utterance_segment": NeuralType(('B', 'T'), ChannelType()),
+            "utterance_mask": NeuralType(('B', 'T'), ChannelType()),
+            "categorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "cat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "categorical_slot_values": NeuralType(('B', 'T'), LabelsType()),
+            "cat_slot_values_mask": NeuralType(('B', 'T', 'C'), ChannelType()),
+            "noncategorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "noncat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "noncategorical_slot_value_start": NeuralType(('B', 'T'), LabelsType()),
+            "noncategorical_slot_value_end": NeuralType(('B', 'T'), LabelsType()),
+            "start_char_idx": NeuralType(('B', 'T'), LabelsType()),
+            "end_char_idx": NeuralType(('B', 'T'), LabelsType()),
+            "num_slots": NeuralType(('B'), LengthsType()),
+            "requested_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "req_slot_mask": NeuralType(('B', 'T'), ChannelType()),
+            "intent_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "intent_status_labels": NeuralType(('B'), LabelsType()),
+        }
+
+    def __init__(
+        self,
+        dataset_split,
+        dialogues_processor,
+        dataset_type=SGDDataset,
+        shuffle=False,
+        batch_size=1,
+        num_workers=-1,
+        pin_memory=False,
+    ):
+        super().__init__()
+        dataset_params = {
+            'dataset_split': dataset_split,
+            'dialogues_processor': dialogues_processor,
+        }
+        self._dataset = dataset_type(**dataset_params)
+        self._batch_size = batch_size
+        self._shuffle = shuffle
+        self._pin_memory = pin_memory
+        if num_workers >= 0:
+            self._num_workers = num_workers
+
+    def __len__(self):
+        return len(self._dataset)
+
+    @property
+    def dataset(self):
+        return self._dataset
+
+    @property
+    def data_iterator(self):
+        return None
diff --git a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py
index 2b7e3800928a..15de8b1e7817 100644
--- a/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/state_tracking_trade_datalayer.py
@@ -44,36 +44,45 @@
 from nemo.collections.nlp.data.datasets import MultiWOZDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['MultiWOZDataLayer']
 
 
 class MultiWOZDataLayer(TextDataLayer):
+    """
+    Creates the data layer to use for State Tracking dataset MultiWOZ.
+
+    Args:
+        data_dir (str): path of the data folder
+        domains (dict): dictionary of the domains to include
+        all_domains (dict): dictionary of all the available domains
+        vocab (Vocab): Vocabulary
+        slots (list): list of the slots
+        gating_dict (dict): dictionary of the gates
+        num_samples (int): number of samples to include
+        batch_size (int): batch size
+        mode (str): mode of dataset, default='train'
+        dataset_type (Dataset): Dataset Type
+        shuffle (bool): enables shuffling, default=False
+        num_workers (int): number of workers
+        input_dropout (float): dropout applied to the input
+        is_training (bool): specifies if it is for training
+    """
+
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
-
         src_ids: ids of input sequences
-
         src_lens: lengths of input sequences
-
         tgt_ids: labels for the generator output
-
         tgt_lens: lengths of the generator targets
-
         gating_labels: labels for the gating head
-
         turn_domain: list of the domains
-            NeuralType(None)
 
         """
         return {
-            # "src_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "src_lens": NeuralType({0: AxisType(BatchTag)}),
-            # "tgt_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}),
-            # "tgt_lens": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-            # "gating_labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-            # "turn_domain": NeuralType(None),
             "src_ids": NeuralType(('B', 'T'), ChannelType()),
             "src_lens": NeuralType(tuple('B'), LengthsType()),
             "tgt_ids": NeuralType(('B', 'D', 'T'), LabelsType()),
diff --git a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py
index a104a5a543f5..4aceabd54f60 100644
--- a/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/text_classification_datalayer.py
@@ -14,34 +14,47 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo.collections.nlp.data import BertTextClassificationDataset
+from nemo.collections.nlp.data.datasets.text_classification.text_classification_dataset import (
+    BertTextClassificationDataset,
+)
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
 from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
-__all__ = ['BertSentenceClassificationDataLayer']
+__all__ = ['BertTextClassificationDataLayer']
 
 
-class BertSentenceClassificationDataLayer(TextDataLayer):
+class BertTextClassificationDataLayer(TextDataLayer):
     """
     Creates the data layer to use for the task of sentence classification
     with pretrained model.
 
-    All the data processing is done BertSentenceClassificationDataset.
+    All the data processing is done BertTextClassificationDataset.
 
     Args:
+        input_file (str): data file
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int): max sequence length minus 2 for [CLS] and [SEP]
+        num_samples (int): number of samples to load. default is -1 which means all samples.
+        shuffle (bool): whether to shuffle data or not. Default: False.
+        batch_size: text segments batch size
         dataset (BertTextClassificationDataset):
                 the dataset that needs to be converted to DataLayerNM
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of masked text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        labels: sequence classification id
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
@@ -56,6 +69,7 @@ def __init__(
         num_samples=-1,
         shuffle=False,
         batch_size=64,
+        use_cache=False,
         dataset_type=BertTextClassificationDataset,
     ):
         dataset_params = {
@@ -63,6 +77,7 @@ def __init__(
             'tokenizer': tokenizer,
             'max_seq_length': max_seq_length,
             'num_samples': num_samples,
+            'use_cache': use_cache,
             'shuffle': shuffle,
         }
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/data_layers/text_datalayer.py b/nemo/collections/nlp/nm/data_layers/text_datalayer.py
index 1b02cb4c1f16..0013fc97e9a4 100644
--- a/nemo/collections/nlp/nm/data_layers/text_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/text_datalayer.py
@@ -25,15 +25,20 @@ class TextDataLayer(DataLayerNM):
     Generic Text Data Layer NM which wraps PyTorch's dataset
 
     Args:
-        dataset_type: type of dataset used for this datalayer
+        dataset_type (Dataset): type of dataset used for this datalayer
         dataset_params (dict): all the params for the dataset
+        batch_size (int): sequence batch size
+        shuffle (bool): whether to shuffle data
     """
 
-    def __init__(self, dataset_type, dataset_params, batch_size, shuffle=False):
+    def __init__(self, dataset_type, dataset_params, batch_size, shuffle=False, num_workers=-1, pin_memory=False):
         super().__init__()
         self._dataset = dataset_type(**dataset_params)
         self._batch_size = batch_size
         self._shuffle = shuffle
+        self._pin_memory = pin_memory
+        if num_workers >= 0:
+            self._num_workers = num_workers
 
     def __len__(self):
         return len(self._dataset)
diff --git a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py
index 5fd6cbe2ee5b..3cd1256ef54a 100644
--- a/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py
+++ b/nemo/collections/nlp/nm/data_layers/token_classification_datalayer.py
@@ -16,27 +16,70 @@
 
 from nemo.collections.nlp.data import BertTokenClassificationDataset, BertTokenClassificationInferDataset
 from nemo.collections.nlp.nm.data_layers.text_datalayer import TextDataLayer
-from nemo.core import ChannelType, LabelsType, NeuralType
+from nemo.core import ChannelType, LabelsType, MaskType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BertTokenClassificationDataLayer', 'BertTokenClassificationInferDataLayer']
 
 
 class BertTokenClassificationDataLayer(TextDataLayer):
+    """
+    Creates the data layer to use for the task of token classification
+    with pretrained model.
+
+    All the data processing is done BertTokenClassificationDataset.
+        text_file (str):
+            file to sequences, each line should a sentence,
+            No header.
+        label_file (str):
+            file to labels, each line corresponds to word labels for a sentence in the text_file. No header.
+        pad_label (int):
+            d value use for labels.
+            by default, it's the neutral label.
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int):
+            max sequence length minus 2 for [CLS] and [SEP]
+        label_ids:
+            dict to map labels to label ids.
+            Starts with pad_label->0 and then increases in alphabetical order
+            For dev set use label_ids generated during training to support
+            cases when not all labels are present in the dev set.
+            For training set label_ids should be None.
+        num_samples (int): 
+            number of samples you want to use for the dataset.
+                If -1, use all dataset. Useful for testing.
+        shuffle (bool): whether to shuffle data or not. Default: False.
+        batch_size (int): text segments batch size
+        ignore_extra_tokens (bool): whether or not to ignore extra tokens
+        ignore_start_end (bool): whether or not to ignore start and end
+        use_cache:
+            whether to use data cache
+        dataset_type (BertTokenClassificationDataset):
+            the dataset that needs to be converted to DataLayerNM
+    """
+
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        input_ids:
+            indices of tokens which constitute batches of text segments
+        input_type_ids:
+            tensor with 0's and 1's to denote the text segment type
+        input_mask:
+            bool tensor with 0s in place of tokens to be masked
+        loss_mask:
+            used to mask and ignore tokens in the loss function
+        subtokens_mask:
+            used to mask all but the first subtoken of the work, could be useful during inference
+        labels:
+            token target ids
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
-            "loss_mask": NeuralType(('B', 'T'), ChannelType()),
+            "loss_mask": NeuralType(('B', 'T'), MaskType()),
             "subtokens_mask": NeuralType(('B', 'T'), ChannelType()),
             "labels": NeuralType(('B', 'T'), LabelsType()),
         }
@@ -63,7 +106,6 @@ def __init__(
             'max_seq_length': max_seq_length,
             'tokenizer': tokenizer,
             'num_samples': num_samples,
-            'shuffle': shuffle,
             'pad_label': pad_label,
             'label_ids': label_ids,
             'ignore_extra_tokens': ignore_extra_tokens,
@@ -74,16 +116,25 @@ def __init__(
 
 
 class BertTokenClassificationInferDataLayer(TextDataLayer):
+    """
+    All the data processing is done BertTokenClassificationInferDataset.
+        queries:
+            (list of str): quiries to run inference on
+        tokenizer (TokenizerSpec): text tokenizer.
+        max_seq_length (int):
+            max sequence length minus 2 for [CLS] and [SEP]
+        shuffle (bool): whether to shuffle data or not. Default: False.
+        batch_size: text segments batch size
+        dataset_type (BertTokenClassificationInferDataset):
+            the dataset that needs to be converted to DataLayerNM
+    """
+
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "subtokens_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask": NeuralType(('B', 'T'), ChannelType()),
@@ -92,7 +143,13 @@ def output_ports(self):
         }
 
     def __init__(
-        self, queries, tokenizer, max_seq_length, batch_size=1, dataset_type=BertTokenClassificationInferDataset,
+        self,
+        queries,
+        tokenizer,
+        max_seq_length,
+        batch_size=1,
+        shuffle=False,
+        dataset_type=BertTokenClassificationInferDataset,
     ):
         dataset_params = {'queries': queries, 'tokenizer': tokenizer, 'max_seq_length': max_seq_length}
-        super().__init__(dataset_type, dataset_params, batch_size, shuffle=False)
+        super().__init__(dataset_type, dataset_params, batch_size, shuffle=shuffle)
diff --git a/nemo/collections/nlp/nm/losses/__init__.py b/nemo/collections/nlp/nm/losses/__init__.py
index 11c24cdefa6b..357839adb61a 100644
--- a/nemo/collections/nlp/nm/losses/__init__.py
+++ b/nemo/collections/nlp/nm/losses/__init__.py
@@ -14,11 +14,7 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo.collections.nlp.nm.losses.aggregator_loss import *
-from nemo.collections.nlp.nm.losses.joint_intent_slot_loss import *
-from nemo.collections.nlp.nm.losses.masked_language_modeling_loss import *
-from nemo.collections.nlp.nm.losses.padded_smoothed_cross_entropy_loss import *
-from nemo.collections.nlp.nm.losses.qa_squad_loss import *
+from nemo.collections.nlp.nm.losses.masked_xentropy_loss import *
+from nemo.collections.nlp.nm.losses.sgd_loss import *
 from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import *
-from nemo.collections.nlp.nm.losses.state_tracking_trade_loss import *
-from nemo.collections.nlp.nm.losses.token_classification_loss import *
+from nemo.collections.nlp.nm.losses.spanning_loss import *
diff --git a/nemo/collections/nlp/nm/losses/aggregator_loss.py b/nemo/collections/nlp/nm/losses/aggregator_loss.py
deleted file mode 100644
index b1681c7048cb..000000000000
--- a/nemo/collections/nlp/nm/losses/aggregator_loss.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-from nemo.backends.pytorch import LossNM
-from nemo.core import LossType, NeuralType
-
-__all__ = ['LossAggregatorNM']
-
-
-class LossAggregatorNM(LossNM):
-    """
-    Neural module which combines sums several losses into one.
-
-    Args:
-        num_inputs (int): number of input losses
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-
-        """
-        input_ports = {}
-        for i in range(self.num_losses):
-            input_ports["loss_" + str(i + 1)] = NeuralType()
-
-        return input_ports
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-
-        loss:
-            NeuralType(None)
-        """
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(self, num_inputs=2):
-        # Store number of inputs/losses.
-        self.num_losses = num_inputs
-        LossNM.__init__(self)
-
-    def _loss_function(self, **kwargs):
-        values = [kwargs[x] for x in sorted(kwargs.keys())]
-        loss = values[0]
-        for loss_i in values[1:]:
-            loss = loss.add(loss_i)
-        return loss
diff --git a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py b/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py
deleted file mode 100644
index ce73176747d7..000000000000
--- a/nemo/collections/nlp/nm/losses/joint_intent_slot_loss.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import torch
-from torch import nn
-
-from nemo.backends.pytorch import LossNM
-from nemo.core import ChannelType, LogitsType, LossType, NeuralType
-
-__all__ = ['JointIntentSlotLoss']
-
-
-class JointIntentSlotLoss(LossNM):
-    """
-    Loss function for the joint intent classification and slot
-    filling task.
-
-    The loss is a joint loss of both tasks, aim to maximize:
-    p(y^i | x)P(y^s1, y^s2, ..., y^sn | x)
-
-    with y^i being the predicted intent and y^s1, y^s2, ..., y^sn
-    are the predicted slots corresponding to x1, x2, ..., xn.
-
-    Args:
-        hidden_states: output of the hidden layers
-        intents: ground truth intents,
-        slots: ground truth slots.
-        input_mask: to differentiate from original tokens and paddings
-        intent_loss_weight: the loss is the sum of:
-            intent_loss_weight * intent_loss +
-            (1 - intent_loss_weight) * slot_loss
-
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-
-        """
-        return {
-            # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-            # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "intents": NeuralType({0: AxisType(BatchTag)}),
-            # "slots": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            "intent_logits": NeuralType(('B', 'D'), LogitsType()),
-            "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()),
-            "loss_mask": NeuralType(('B', 'T'), ChannelType()),
-            "intents": NeuralType(tuple('B'), ChannelType()),
-            "slots": NeuralType(('B', 'T'), ChannelType()),
-        }
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-
-        loss:
-            NeuralType(None)
-        """
-        # return {"loss": NeuralType(None)}
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(
-        self, num_slots, slot_classes_loss_weights=None, intent_classes_loss_weights=None, intent_loss_weight=0.6,
-    ):
-        LossNM.__init__(self)
-        self.num_slots = num_slots
-        self.intent_loss_weight = intent_loss_weight
-        self.slot_classes_loss_weights = slot_classes_loss_weights
-        self.intent_classes_loss_weights = intent_classes_loss_weights
-
-        # For weighted loss to tackle class imbalance
-        if slot_classes_loss_weights:
-            self.slot_classes_loss_weights = torch.FloatTensor(slot_classes_loss_weights).to(self._device)
-
-        if intent_classes_loss_weights:
-            self.intent_classes_loss_weights = torch.FloatTensor(intent_classes_loss_weights).to(self._device)
-
-        self._criterion_intent = nn.CrossEntropyLoss(weight=self.intent_classes_loss_weights)
-        self._criterion_slot = nn.CrossEntropyLoss(weight=self.slot_classes_loss_weights)
-
-    def _loss_function(self, intent_logits, slot_logits, loss_mask, intents, slots):
-        intent_loss = self._criterion_intent(intent_logits, intents)
-
-        active_loss = loss_mask.view(-1) > 0.5
-        active_logits = slot_logits.view(-1, self.num_slots)[active_loss]
-        active_labels = slots.view(-1)[active_loss]
-
-        # To support empty active_labels
-        if len(active_labels) == 0:
-            slot_loss = 0.0
-        else:
-            slot_loss = self._criterion_slot(active_logits, active_labels)
-        loss = intent_loss * self.intent_loss_weight + slot_loss * (1 - self.intent_loss_weight)
-
-        return loss
diff --git a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py b/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py
deleted file mode 100644
index 38f5169bf348..000000000000
--- a/nemo/collections/nlp/nm/losses/masked_language_modeling_loss.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-from nemo.backends.pytorch import LossNM
-from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss
-from nemo.core import ChannelType, LogitsType, LossType, NeuralType
-
-__all__ = ['MaskedLanguageModelingLossNM']
-
-
-class MaskedLanguageModelingLossNM(LossNM):
-    """
-    Neural module which implements Masked Language Modeling (MLM) loss.
-
-    Args:
-        label_smoothing (float): label smoothing regularization coefficient
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-        """
-        return {
-            # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "output_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
-            "output_ids": NeuralType(('B', 'T'), ChannelType()),
-            "output_mask": NeuralType(('B', 'T'), ChannelType()),
-        }
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-
-        loss:
-            NeuralType(None)
-        """
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(self, label_smoothing=0.0):
-        LossNM.__init__(self)
-        self._criterion = SmoothedCrossEntropyLoss(label_smoothing)
-
-    def _loss_function(self, logits, output_ids, output_mask):
-        loss = self._criterion(logits, output_ids, output_mask)
-        return loss
diff --git a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py b/nemo/collections/nlp/nm/losses/masked_xentropy_loss.py
similarity index 54%
rename from nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py
rename to nemo/collections/nlp/nm/losses/masked_xentropy_loss.py
index aa67439b9262..6d876ea752d7 100644
--- a/nemo/collections/nlp/nm/losses/state_tracking_trade_loss.py
+++ b/nemo/collections/nlp/nm/losses/masked_xentropy_loss.py
@@ -39,46 +39,44 @@
 import torch
 
 from nemo.backends.pytorch.nm import LossNM
-from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, LossType, NeuralType
+from nemo.core.neural_types import LabelsType, LengthsType, LogitsType, LossType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
-__all__ = ['TRADEMaskedCrossEntropy', 'CrossEntropyLoss3D']
+__all__ = ['MaskedLogLoss']
 
 
-class TRADEMaskedCrossEntropy(LossNM):
+class MaskedLogLoss(LossNM):
     """
-    Neural module which implements a cross entropy for trade model with masking feature.
+    Neural module which implements a cross entropy model with masking feature. It keeps just the target logit for cross entropy calculation
 
     Args:
         logits (float): output of the classifier
-        targets (long): ground truth targets
+        labels (long): ground truth targets
         loss_mask (long): specifies the ones to get ignored in loss calculation
 
 
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
 
         logits: 4d tensor of logits
 
-        targets: 3d tensor of labels
+        labels: 3d tensor of labels
 
         loss_mask: specifies the words to be considered in the loss calculation
 
         """
         return {
-            # "logits": NeuralType(
-            #     {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)}
-            # ),
-            # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
             "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()),
-            "targets": NeuralType(('B', 'D', 'T'), LabelsType()),
-            "loss_mask": NeuralType(('B', 'D'), LengthsType()),
+            "labels": NeuralType(('B', 'D', 'T'), LabelsType()),
+            "length_mask": NeuralType(('B', 'D'), LengthsType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -88,63 +86,21 @@ def output_ports(self):
     def __init__(self):
         LossNM.__init__(self)
 
-    def _loss_function(self, logits, targets, loss_mask):
+    def _loss_function(self, logits, labels, length_mask, eps=1e-10):
         logits_flat = logits.view(-1, logits.size(-1))
-        eps = 1e-10
         log_probs_flat = torch.log(torch.clamp(logits_flat, min=eps))
-        target_flat = targets.view(-1, 1)
-        losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
-        losses = losses_flat.view(*targets.size())
-        loss = self.masking(losses, loss_mask)
+        labels_flat = labels.view(-1, 1)
+        losses_flat = -torch.gather(log_probs_flat, dim=1, index=labels_flat)
+        losses = losses_flat.view(*labels.size())
+        loss = self.masking(losses, length_mask)
         return loss
 
     @staticmethod
-    def masking(losses, mask):
+    def masking(losses, length_mask):
         max_len = losses.size(2)
 
-        mask_ = torch.arange(max_len, device=mask.device)[None, None, :] < mask[:, :, None]
+        mask_ = torch.arange(max_len, device=length_mask.device)[None, None, :] < length_mask[:, :, None]
         mask_ = mask_.float()
         losses = losses * mask_
         loss = losses.sum() / mask_.sum()
         return loss
-
-
-class CrossEntropyLoss3D(LossNM):
-    """
-    Neural module which implements a cross entropy loss for 3d logits.
-    Args:
-        num_classes (int): number of classes in a classifier, e.g. size
-            of the vocabulary in language modeling objective
-        logits (float): output of the classifier
-        labels (long): ground truth labels
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-        """
-        return {
-            # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-            "logits": NeuralType(('B', 'D', 'D'), LogitsType()),
-            "labels": NeuralType(('B', 'D'), LabelsType()),
-        }
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-        """
-        # return {"loss": NeuralType(None)}
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(self, num_classes, **kwargs):
-        LossNM.__init__(self, **kwargs)
-        self._criterion = torch.nn.CrossEntropyLoss()
-        self.num_classes = num_classes
-
-    def _loss_function(self, logits, labels):
-        logits_flatten = logits.view(-1, self.num_classes)
-        labels_flatten = labels.view(-1)
-
-        loss = self._criterion(logits_flatten, labels_flatten)
-        return loss
diff --git a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py
deleted file mode 100644
index 1564f43c40b0..000000000000
--- a/nemo/collections/nlp/nm/losses/padded_smoothed_cross_entropy_loss.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-from nemo.backends.pytorch import LossNM
-from nemo.collections.nlp.nm.losses.smoothed_cross_entropy_loss import SmoothedCrossEntropyLoss
-from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens
-from nemo.core import LabelsType, LogitsType, LossType, NeuralType
-
-__all__ = ['PaddedSmoothedCrossEntropyLossNM']
-
-
-class PaddedSmoothedCrossEntropyLossNM(LossNM):
-    """
-    Neural module which calculates CrossEntropyLoss and
-    1) excludes padding tokens from loss calculation
-    2) allows to use label smoothing regularization
-    3) allows to calculate loss for the desired number of last tokens
-
-    Args:
-        label_smoothing (float): label smoothing regularization coefficient
-        predict_last_k (int): how many last tokens to use for the loss
-            calculation, important for fast evaluation of LM perplexity
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-        """
-        return {
-            # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "target_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
-            "target_ids": NeuralType(('B', 'T'), LabelsType()),
-        }
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-        """
-        # return {"loss": NeuralType(None)}
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(self, pad_id, label_smoothing=0, predict_last_k=0):
-        LossNM.__init__(self)
-
-        self._loss_fn = SmoothedCrossEntropyLoss(label_smoothing, predict_last_k)
-        self._pad_id = pad_id
-
-    def _loss_function(self, logits, target_ids):
-        target_mask = mask_padded_tokens(target_ids, self._pad_id).to(logits.dtype)
-        loss = self._loss_fn(logits, target_ids, target_mask)
-        return loss
diff --git a/nemo/collections/nlp/nm/losses/sgd_loss.py b/nemo/collections/nlp/nm/losses/sgd_loss.py
new file mode 100644
index 000000000000..74d7fc437512
--- /dev/null
+++ b/nemo/collections/nlp/nm/losses/sgd_loss.py
@@ -0,0 +1,215 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+'''
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/train_and_predict.py
+'''
+
+import torch
+
+from nemo import logging
+from nemo.backends.pytorch import LossNM
+from nemo.collections.nlp.data.datasets.sgd_dataset.input_example import STATUS_ACTIVE
+from nemo.core import ChannelType, LabelsType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['SGDDialogueStateLossNM']
+
+
+class SGDDialogueStateLossNM(LossNM):
+    """
+    Neural module which implements loss for SGD model.
+    """
+
+    @property
+    @add_port_docs
+    def input_ports(self):
+        """Returns definitions of module input ports.
+            logit_intent_status (float): Output of SGD model
+            intent_status_labels (int): Intent labels
+            logit_req_slot_status (float): Output of SGD model
+            requested_slot_status (float): Takes value 1 if the corresponding slot is requested, 0 otherwise
+            req_slot_mask (bool): Masks requested slots not used for the particular service
+            logit_cat_slot_status (float): Output of SGD model
+            categorical_slot_status (int): The status of each categorical slot in the service
+            cat_slot_status_mask (bool): Masks categorical slots not used for the particular service
+            logit_cat_slot_value (float): Output of SGD model
+            categorical_slot_values (int): The index of the correct value for each categorical slot
+            logit_noncat_slot_status (float): Output of SGD model
+            noncategorical_slot_status (int): The status of each noncategorical slot in the service
+            noncat_slot_status_mask (bool): masks noncategorical slots not used for the particular service
+            logit_noncat_slot_start (float): Output of SGD model
+            logit_noncat_slot_end (float): Output of SGD model
+            noncategorical_slot_value_start (int): The index of the starting subword corresponding to the slot span for a non-categorical slot value
+            noncategorical_slot_value_end (int): The index of the ending (inclusive) subword corresponding to the slot span for a non-categorical slot value
+        """
+        return {
+            "logit_intent_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "intent_status_labels": NeuralType(('B'), LabelsType()),
+            "logit_req_slot_status": NeuralType(('B', 'T'), LogitsType()),
+            "requested_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "req_slot_mask": NeuralType(('B', 'T'), ChannelType()),
+            "logit_cat_slot_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "categorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "cat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "logit_cat_slot_value": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "categorical_slot_values": NeuralType(('B', 'T'), LabelsType()),
+            "logit_noncat_slot_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "noncategorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
+            "noncat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "logit_noncat_slot_start": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_noncat_slot_end": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "noncategorical_slot_value_start": NeuralType(('B', 'T'), LabelsType()),
+            "noncategorical_slot_value_end": NeuralType(('B', 'T'), LabelsType()),
+        }
+
+    @property
+    def output_ports(self):
+        """
+        Returns definitions of module output ports.
+        loss:
+            NeuralType(None)
+        """
+        return {"loss": NeuralType(None)}
+
+    def __init__(self, reduction='mean'):
+        """
+        Args:
+            reduction (str): specifies the reduction to apply to the final loss, choose 'mean' or 'sum'
+        """
+        super().__init__()
+
+        if reduction not in ['mean', 'sum']:
+            logging.warning(f'{reduction} reduction is not supported. Setting reduction to "mean"')
+            reduction = 'mean'
+
+        self.reduction = reduction
+        self._cross_entropy = torch.nn.CrossEntropyLoss(reduction=self.reduction)
+        self._criterion_req_slots = torch.nn.BCEWithLogitsLoss(reduction=self.reduction)
+
+    def _loss_function(
+        self,
+        logit_intent_status,
+        intent_status_labels,
+        logit_req_slot_status,
+        requested_slot_status,
+        req_slot_mask,
+        logit_cat_slot_status,
+        categorical_slot_status,
+        cat_slot_status_mask,
+        logit_cat_slot_value,
+        categorical_slot_values,
+        logit_noncat_slot_status,
+        noncategorical_slot_status,
+        noncat_slot_status_mask,
+        logit_noncat_slot_start,
+        logit_noncat_slot_end,
+        noncategorical_slot_value_start,
+        noncategorical_slot_value_end,
+    ):
+        # Intent loss
+        intent_loss = self._cross_entropy(logit_intent_status, intent_status_labels)
+
+        # Requested slots.
+        # Shape: (batch_size, max_num_slots)
+        # mask unused slots
+        # Sigmoid cross entropy is used because more than one slots can be requested in a single utterance
+        req_slot_mask = req_slot_mask > 0.5
+        requested_slot_loss = self._criterion_req_slots(
+            logit_req_slot_status[req_slot_mask], requested_slot_status[req_slot_mask]
+        )
+
+        # Categorical slot status
+        # Shape of logit_cat_slot_status: (batch_size, max_num_cat_slots, 3)
+        # cat_slot_status_mask masks unused categorical padded slots for the service
+        cat_slot_status_mask = cat_slot_status_mask.view(-1) > 0.5
+        if sum(cat_slot_status_mask) == 0:
+            logging.warning(f'No categorical slots in the batch')
+            cat_slot_status_loss = torch.clamp(torch.max(logit_cat_slot_status.view(-1)), 0, 0)
+        else:
+            cat_slot_status_loss = self._cross_entropy(
+                logit_cat_slot_status.view(-1, 3)[cat_slot_status_mask],
+                categorical_slot_status.view(-1)[cat_slot_status_mask],
+            )
+
+        # Categorical slot values.
+        # Shape: (batch_size, max_num_cat_slots, max_num_slot_values).
+        max_num_slot_values = logit_cat_slot_value.size()[-1]
+
+        # Zero out losses for categorical slot value when the slot status is not active.
+        cat_slot_value_mask = (categorical_slot_status == STATUS_ACTIVE).view(-1)
+        # to handle cases with no active categorical slot value
+        if sum(cat_slot_value_mask) == 0:
+            logging.warning(f'No active values for categorical slots in the batch.')
+            cat_slot_value_loss = torch.clamp(torch.max(logit_cat_slot_value.view(-1)), 0, 0)
+        else:
+            slot_values_active_logits = logit_cat_slot_value.view(-1, max_num_slot_values)[cat_slot_value_mask]
+            slot_values_active_labels = categorical_slot_values.view(-1)[cat_slot_value_mask]
+            cat_slot_value_loss = self._cross_entropy(slot_values_active_logits, slot_values_active_labels)
+
+        # Non-categorical slot status.
+        # Shape: (batch_size, max_num_noncat_slots, 3).
+        # noncat_slot_status_mask masks unused noncat slots for the service
+        noncat_slot_status_mask = noncat_slot_status_mask.view(-1) > 0.5
+        if sum(noncat_slot_status_mask) == 0:
+            logging.warning(f'No active non-categorical slots in the batch.')
+            noncat_slot_status_loss = torch.clamp(torch.max(logit_noncat_slot_status.view(-1)), 0, 0)
+        else:
+            noncat_slot_status_loss = self._cross_entropy(
+                logit_noncat_slot_status.view(-1, 3)[noncat_slot_status_mask],
+                noncategorical_slot_status.view(-1)[noncat_slot_status_mask],
+            )
+
+        # Non-categorical slot spans.
+        # Shape: (batch_size, max_num_noncat_slots, max_num_tokens).n
+        max_num_tokens = logit_noncat_slot_start.size()[-1]
+        # Zero out losses for non-categorical slot spans when the slot status is not active.
+        # changed here
+        non_cat_slot_value_mask = (noncategorical_slot_status == STATUS_ACTIVE).view(-1)
+        # non_cat_slot_value_mask = (noncategorical_slot_status > -1 ).view(-1)
+        # to handle cases with no active categorical slot value
+        if sum(non_cat_slot_value_mask) == 0:
+            logging.warning(f'No active values for non-categorical slots in the batch.')
+            span_start_loss = torch.clamp(torch.max(logit_noncat_slot_start.view(-1)), 0, 0)
+            span_end_loss = torch.clamp(torch.max(logit_noncat_slot_end.view(-1)), 0, 0)
+        else:
+            noncat_slot_start_active_logits = logit_noncat_slot_start.view(-1, max_num_tokens)[non_cat_slot_value_mask]
+            noncat_slot_start_active_labels = noncategorical_slot_value_start.view(-1)[non_cat_slot_value_mask]
+            span_start_loss = self._cross_entropy(noncat_slot_start_active_logits, noncat_slot_start_active_labels)
+
+            noncat_slot_end_active_logits = logit_noncat_slot_end.view(-1, max_num_tokens)[non_cat_slot_value_mask]
+            noncat_slot_end_active_labels = noncategorical_slot_value_end.view(-1)[non_cat_slot_value_mask]
+            span_end_loss = self._cross_entropy(noncat_slot_end_active_logits, noncat_slot_end_active_labels)
+
+        losses = {
+            "intent_loss": intent_loss,
+            "requested_slot_loss": requested_slot_loss,
+            "cat_slot_status_loss": cat_slot_status_loss,
+            "cat_slot_value_loss": cat_slot_value_loss,
+            "noncat_slot_status_loss": noncat_slot_status_loss,
+            "span_start_loss": span_start_loss,
+            "span_end_loss": span_end_loss,
+        }
+
+        total_loss = sum(losses.values())
+        if self.reduction == 'mean':
+            total_loss = total_loss / len(losses)
+        else:
+            batch_size = logit_intent_status.shape[0]
+            total_loss = total_loss / batch_size
+        return total_loss
diff --git a/nemo/collections/nlp/nm/losses/smoothed_cross_entropy_loss.py b/nemo/collections/nlp/nm/losses/smoothed_cross_entropy_loss.py
index cecedece75de..b33a4c4b7611 100644
--- a/nemo/collections/nlp/nm/losses/smoothed_cross_entropy_loss.py
+++ b/nemo/collections/nlp/nm/losses/smoothed_cross_entropy_loss.py
@@ -16,10 +16,65 @@
 
 import torch
 
+from nemo.backends.pytorch import LossNM
+from nemo.collections.nlp.utils.data_utils import mask_padded_tokens
+from nemo.core import LabelsType, LogitsType, LossType, MaskType, NeuralType
+
 __all__ = ['SmoothedCrossEntropyLoss']
 
 
-class SmoothedCrossEntropyLoss(torch.nn.Module):
+class SmoothedCrossEntropyLoss(LossNM):
+    """
+    Neural module which calculates CrossEntropyLoss and
+    1) excludes padding tokens from loss calculation
+    2) allows to use label smoothing regularization
+    3) allows to calculate loss for the desired number of last tokens
+
+    Args:
+        label_smoothing (float): label smoothing regularization coefficient
+        predict_last_k (int): how many last tokens to use for the loss
+            calculation, important for fast evaluation of LM perplexity
+    """
+
+    @property
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        """
+        return {
+            "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
+            "labels": NeuralType(('B', 'T'), LabelsType()),
+            "output_mask": NeuralType(('B', 'T'), MaskType(), optional=True),
+        }
+
+    @property
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        """
+        # return {"loss": NeuralType(None)}
+        return {"loss": NeuralType(elements_type=LossType())}
+
+    def __init__(self, pad_id=None, label_smoothing=0, predict_last_k=0):
+        LossNM.__init__(self)
+
+        self._loss_fn = SmoothedCrossEntropy(label_smoothing, predict_last_k)
+        self._pad_id = pad_id
+
+    def _loss_function(self, logits, labels, output_mask=None):
+        if output_mask is not None:
+            labels_mask = output_mask
+        elif self._pad_id is not None:
+            labels_mask = mask_padded_tokens(labels, self._pad_id).to(logits.dtype)
+        else:
+            raise ValueError("Both output_mask and pad_id are None")
+
+        if labels_mask.dtype is not logits.dtype:
+            labels_mask = labels_mask.to(logits.dtype)
+
+        loss = self._loss_fn(logits, labels, labels_mask)
+        return loss
+
+
+class SmoothedCrossEntropy(torch.nn.Module):
     """
     Cross-entropy loss with label smoothing for a batch of sequences.
 
@@ -41,16 +96,17 @@ def __init__(self, label_smoothing=0.0, predict_last_k=0):
         self._smoothing = label_smoothing
         self._predict_last_k = predict_last_k
 
-    def forward(self, logits, output_ids, output_mask, eps=1e-6):
+    def forward(self, logits, labels, output_mask, eps=1e-6):
         """
         Args:
-            logits: float tensor of shape batch_size x seq_len x vocab_size
-            output_ids: int tensor of shape batch_size x seq_len
+            logits: float tensor of shape batch_size x seq_len x vocab_size, values should be log probabilities
+            labels: int tensor of shape batch_size x seq_len
             output_mask: binary tensor of shape batch_size x seq_len
+            eps: epsilon param to avoid divide by zero in loss calculation
         """
         batch_size, seq_len, vocab_size = logits.size()
         smoothing = vocab_size * self._smoothing / (vocab_size - 1)
-        target_logits = logits.gather(2, output_ids.unsqueeze(2)).squeeze(2)
+        target_logits = logits.gather(2, labels.unsqueeze(2)).squeeze(2)
         smoothing_logits = logits.mean(dim=-1)
         neg_log_likelihood = (1.0 - smoothing) * target_logits + smoothing * smoothing_logits
         neg_log_likelihood = neg_log_likelihood[:, -self._predict_last_k :]
diff --git a/nemo/collections/nlp/nm/losses/qa_squad_loss.py b/nemo/collections/nlp/nm/losses/spanning_loss.py
similarity index 86%
rename from nemo/collections/nlp/nm/losses/qa_squad_loss.py
rename to nemo/collections/nlp/nm/losses/spanning_loss.py
index 1237b9255edb..d0193725887f 100644
--- a/nemo/collections/nlp/nm/losses/qa_squad_loss.py
+++ b/nemo/collections/nlp/nm/losses/spanning_loss.py
@@ -18,11 +18,12 @@
 
 from nemo.backends.pytorch import LossNM
 from nemo.core import ChannelType, LogitsType, LossType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
-__all__ = ['QuestionAnsweringLoss']
+__all__ = ['SpanningLoss']
 
 
-class QuestionAnsweringLoss(LossNM):
+class SpanningLoss(LossNM):
     """
     Neural module which implements QuestionAnswering loss.
     Args:
@@ -36,19 +37,18 @@ class QuestionAnsweringLoss(LossNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
         return {
-            # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "start_positions": NeuralType({0: AxisType(BatchTag)}),
-            # "end_positions": NeuralType({0: AxisType(BatchTag)}),
             "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
             "start_positions": NeuralType(tuple('B'), ChannelType()),
             "end_positions": NeuralType(tuple('B'), ChannelType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -66,9 +66,6 @@ def output_ports(self):
             1: AxisType(TimeTag)
         """
         return {
-            # "loss": NeuralType(None),
-            # "start_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "end_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "loss": NeuralType(elements_type=LossType()),
             "start_logits": NeuralType(('B', 'T'), ChannelType()),
             "end_logits": NeuralType(('B', 'T'), ChannelType()),
diff --git a/nemo/collections/nlp/nm/losses/token_classification_loss.py b/nemo/collections/nlp/nm/losses/token_classification_loss.py
deleted file mode 100644
index e27c74e952a3..000000000000
--- a/nemo/collections/nlp/nm/losses/token_classification_loss.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import torch
-from torch import nn
-
-from nemo.backends.pytorch import LossNM
-from nemo.core import ChannelType, LabelsType, LogitsType, LossType, NeuralType
-
-__all__ = ['TokenClassificationLoss']
-
-
-class TokenClassificationLoss(LossNM):
-    """
-    Neural module which implements Token Classification loss.
-
-    Args:
-        num_classes (int): number of classes in a classifier, e.g. size
-            of the vocabulary in language modeling objective
-        logits (float): output of the classifier
-        labels (long): ground truth labels
-        loss_mask (long): to differentiate from original tokens and paddings
-    """
-
-    @property
-    def input_ports(self):
-        """Returns definitions of module input ports.
-        """
-        return {
-            # "logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "labels": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            "logits": NeuralType(('B', 'T', 'D'), LogitsType()),
-            "labels": NeuralType(('B', 'T'), LabelsType()),
-            "loss_mask": NeuralType(('B', 'T'), ChannelType()),
-        }
-
-    @property
-    def output_ports(self):
-        """Returns definitions of module output ports.
-
-        loss:
-            NeuralType(None)
-        """
-        return {"loss": NeuralType(elements_type=LossType())}
-
-    def __init__(self, num_classes, class_weights=None):
-        LossNM.__init__(self)
-        if class_weights:
-            class_weights = torch.FloatTensor(class_weights).to(self._device)
-
-        self._criterion = nn.CrossEntropyLoss(weight=class_weights)
-        self.num_classes = num_classes
-
-    def _loss_function(self, logits, labels, loss_mask):
-        active_loss = loss_mask.view(-1) > 0.5
-        active_logits = logits.view(-1, self.num_classes)[active_loss]
-        active_labels = labels.view(-1)[active_loss]
-
-        loss = self._criterion(active_logits, active_labels)
-        return loss
diff --git a/nemo/collections/nlp/nm/trainables/common/__init__.py b/nemo/collections/nlp/nm/trainables/common/__init__.py
index 30252d72690b..0061462d13fe 100644
--- a/nemo/collections/nlp/nm/trainables/common/__init__.py
+++ b/nemo/collections/nlp/nm/trainables/common/__init__.py
@@ -14,8 +14,16 @@
 # limitations under the License.
 # =============================================================================
 
-import nemo.collections.nlp.nm.trainables.common.huggingface
+from nemo.collections.nlp.nm.trainables.common.common_utils import *
+from nemo.collections.nlp.nm.trainables.common.huggingface import *
 from nemo.collections.nlp.nm.trainables.common.sequence_classification_nm import *
 from nemo.collections.nlp.nm.trainables.common.sequence_regression_nm import *
 from nemo.collections.nlp.nm.trainables.common.token_classification_nm import *
 from nemo.collections.nlp.nm.trainables.common.transformer import *
+from nemo.utils import logging
+
+try:
+    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_utils import *
+
+except Exception as e:
+    logging.error('Failed to import Megatron utils: `{}` ({})'.format(str(e), type(e)))
diff --git a/nemo/collections/nlp/nm/trainables/common/common_utils.py b/nemo/collections/nlp/nm/trainables/common/common_utils.py
new file mode 100644
index 000000000000..4964269f65ed
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/common/common_utils.py
@@ -0,0 +1,86 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import json
+
+from nemo import logging
+from nemo.collections.nlp.nm.trainables.common.huggingface.huggingface_utils import *
+
+try:
+    __megatron_utils_satisfied = True
+    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_bert_nm import MegatronBERT
+    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_utils import *
+
+except Exception as e:
+    logging.error('Failed to import Megatron Neural Module and utils: `{}` ({})'.format(str(e), type(e)))
+    __megatron_utils_satisfied = False
+
+
+__all__ = ['get_pretrained_lm_models_list', 'get_pretrained_lm_model']
+
+
+def get_pretrained_lm_models_list():
+    '''
+    Returns the list of support pretrained models
+    '''
+    if __megatron_utils_satisfied:
+        return get_megatron_lm_models_list() + get_huggingface_lm_models_list()
+    else:
+        return get_huggingface_lm_models_list()
+
+
+def get_pretrained_lm_model(pretrained_model_name, config=None, vocab=None, checkpoint=None):
+    '''
+    Returns pretrained model
+    Args:
+        pretrained_model_name (str): pretrained model name, for example, bert-base-uncased.
+            See the full list by calling get_pretrained_lm_models_list()
+        config (str): path to the model configuration file
+        vocab (str): path to the vocabulary file used during model training 
+        checkpoint (str): path to the pretrained model checkpoint
+    Returns:
+        Pretrained model (NM)
+    '''
+    if pretrained_model_name in get_huggingface_lm_models_list():
+        model = get_huggingface_lm_model(bert_config=config, pretrained_model_name=pretrained_model_name)
+    elif __megatron_utils_satisfied and pretrained_model_name in get_megatron_lm_models_list():
+        if pretrained_model_name == 'megatron-bert-cased' or pretrained_model_name == 'megatron-bert-uncased':
+            if not (config and checkpoint):
+                raise ValueError(f'Config file and pretrained checkpoint required for {pretrained_model_name}')
+        if not config:
+            config = get_megatron_config_file(pretrained_model_name)
+        if isinstance(config, str):
+            with open(config) as f:
+                config = json.load(f)
+        if not vocab:
+            vocab = get_megatron_vocab_file(pretrained_model_name)
+        if not checkpoint:
+            checkpoint = get_megatron_checkpoint(pretrained_model_name)
+        model = MegatronBERT(
+            model_name=pretrained_model_name,
+            vocab_file=vocab,
+            hidden_size=config['hidden-size'],
+            num_attention_heads=config['num-attention-heads'],
+            num_layers=config['num-layers'],
+            max_seq_length=config['max-seq-length'],
+        )
+    else:
+        raise ValueError(f'{pretrained_model_name} is not supported')
+
+    if checkpoint:
+        model.restore_from(checkpoint)
+        logging.info(f"{pretrained_model_name} model restored from {checkpoint}")
+    return model
diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/__init__.py b/nemo/collections/nlp/nm/trainables/common/huggingface/__init__.py
index d71ca17ce84b..f7ad8050443e 100644
--- a/nemo/collections/nlp/nm/trainables/common/huggingface/__init__.py
+++ b/nemo/collections/nlp/nm/trainables/common/huggingface/__init__.py
@@ -16,4 +16,5 @@
 
 from nemo.collections.nlp.nm.trainables.common.huggingface.albert_nm import *
 from nemo.collections.nlp.nm.trainables.common.huggingface.bert_nm import *
+from nemo.collections.nlp.nm.trainables.common.huggingface.huggingface_utils import *
 from nemo.collections.nlp.nm.trainables.common.huggingface.roberta_nm import *
diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/albert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/albert_nm.py
index 9df214302072..b8d3603e6e22 100644
--- a/nemo/collections/nlp/nm/trainables/common/huggingface/albert_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/huggingface/albert_nm.py
@@ -1,5 +1,7 @@
 # =============================================================================
 # Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +20,7 @@
 
 from transformers import (
     ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
+    ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
     AlbertConfig,
     AlbertModel,
 )
@@ -26,6 +28,7 @@
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core.neural_modules import PretrainedModelInfo
 from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['Albert']
 
@@ -52,14 +55,13 @@ class Albert(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids: input token ids
+        token_type_ids: segment type ids
+        attention_mask: attention mask
         """
-        # return {
-        #     "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        #     "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        #     "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        # }
         return {
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "token_type_ids": NeuralType(('B', 'T'), ChannelType()),
@@ -67,10 +69,11 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
-        """Returns definitions of module output ports.
+        """Returns definitions of module input ports.
+        hidden_states: output embedding
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     def __init__(
@@ -131,19 +134,12 @@ def __init__(
 
         self.add_module("albert", model)
         self.config = model.config
-
-        # TK: storing config name in init_params instead.
-        # for key, value in self.config.to_dict().items():
-        #    self._local_parameters[key] = value
-
-        # Store the only value that will be used externally - hidden_size.
         self._hidden_size = model.config.hidden_size
 
     @property
     def hidden_size(self):
         """
             Property returning hidden size.
-
             Returns:
                 Hidden size.
         """
@@ -152,12 +148,12 @@ def hidden_size(self):
     @staticmethod
     def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
         pretrained_models = []
-        for key, value in ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.items():
+        for key in ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST:
             model_info = PretrainedModelInfo(
                 pretrained_model_name=key,
                 description="weights by HuggingFace",
                 parameters=ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP[key],
-                location=value,
+                location="",
             )
             pretrained_models.append(model_info)
         return pretrained_models
diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py
index 28cc34a4cf0d..23a455cc94ad 100644
--- a/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/huggingface/bert_nm.py
@@ -1,5 +1,7 @@
 # =============================================================================
 # Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,11 +18,12 @@
 
 from typing import List, Optional
 
-from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_MAP, BertConfig, BertModel
+from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_LIST, BertConfig, BertModel
 
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core.neural_modules import PretrainedModelInfo
 from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BERT']
 
@@ -47,23 +50,25 @@ class BERT(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids: input token ids
+        token_type_ids: segment type ids
+        attention_mask: attention mask
         """
         return {
-            # "input_ids":      NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "token_type_ids": NeuralType(('B', 'T'), ChannelType()),
             "attention_mask": NeuralType(('B', 'T'), ChannelType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        hidden_states: output embedding 
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     def __init__(
@@ -77,7 +82,13 @@ def __init__(
         intermediate_size=3072,
         hidden_act="gelu",
         max_position_embeddings=512,
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
     ):
+
         super().__init__()
 
         # Check that only one of pretrained_model_name, config_filename, and
@@ -124,12 +135,6 @@ def __init__(
 
         self.add_module("bert", model)
         self.config = model.config
-
-        # TK: storing config name in init_params instead.
-        # for key, value in self.config.to_dict().items():
-        #    self._local_parameters[key] = value
-
-        # Store the only value that will be used externally - hidden_size.
         self._hidden_size = model.config.hidden_size
 
     @property
@@ -145,12 +150,12 @@ def hidden_size(self):
     @staticmethod
     def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
         pretrained_models = []
-        for key, value in BERT_PRETRAINED_MODEL_ARCHIVE_MAP.items():
+        for key in BERT_PRETRAINED_MODEL_ARCHIVE_LIST:
             model_info = PretrainedModelInfo(
                 pretrained_model_name=key,
                 description="weights by HuggingFace",
                 parameters=BERT_PRETRAINED_CONFIG_ARCHIVE_MAP[key],
-                location=value,
+                location="",
             )
             pretrained_models.append(model_info)
         return pretrained_models
diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/huggingface_utils.py b/nemo/collections/nlp/nm/trainables/common/huggingface/huggingface_utils.py
new file mode 100644
index 000000000000..05c36e171f93
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/common/huggingface/huggingface_utils.py
@@ -0,0 +1,57 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.nm.trainables.common.huggingface.albert_nm import Albert
+from nemo.collections.nlp.nm.trainables.common.huggingface.bert_nm import BERT
+from nemo.collections.nlp.nm.trainables.common.huggingface.roberta_nm import Roberta
+
+__all__ = ['MODELS', 'get_huggingface_lm_model', 'get_huggingface_lm_models_list']
+
+
+def get_huggingface_lm_model(pretrained_model_name, bert_config=None):
+    '''
+    Returns the dict of special tokens associated with the model.
+    Args:
+    pretrained_mode_name ('str'): name of the pretrained model from the hugging face list,
+        for example: bert-base-cased
+    bert_config: path to model configuration file.
+    '''
+    model_type = pretrained_model_name.split('-')[0]
+    if model_type in MODELS:
+        if bert_config:
+            return MODELS[model_type]['class'](config_filename=bert_config)
+        else:
+            return MODELS[model_type]['class'](pretrained_model_name=pretrained_model_name)
+    else:
+        raise ValueError(f'{pretrained_model_name} is not supported')
+
+
+MODELS = {
+    'bert': {'default': 'bert-base-uncased', 'class': BERT},
+    'roberta': {'default': 'roberta-base', 'class': Roberta},
+    'albert': {'default': 'albert-base-v2', 'class': Albert},
+}
+
+
+def get_huggingface_lm_models_list():
+    '''
+    Returns the list of supported HuggingFace models
+    '''
+    huggingface_models = []
+    for model in MODELS:
+        model_names = [x.pretrained_model_name for x in MODELS[model]['class'].list_pretrained_models()]
+        huggingface_models.extend(model_names)
+    return huggingface_models
diff --git a/nemo/collections/nlp/nm/trainables/common/huggingface/roberta_nm.py b/nemo/collections/nlp/nm/trainables/common/huggingface/roberta_nm.py
index 2f0396172d3b..54a1dbf163f4 100644
--- a/nemo/collections/nlp/nm/trainables/common/huggingface/roberta_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/huggingface/roberta_nm.py
@@ -1,5 +1,7 @@
 # =============================================================================
 # Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and
+# The HuggingFace Inc. team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +20,7 @@
 
 from transformers import (
     ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
-    ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
+    ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
     RobertaConfig,
     RobertaModel,
 )
@@ -26,6 +28,7 @@
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core.neural_modules import PretrainedModelInfo
 from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['Roberta']
 
@@ -52,14 +55,13 @@ class Roberta(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids: input token ids
+        token_type_ids: segment type ids
+        attention_mask: attention mask
         """
-        # return {
-        #     "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        #     "token_type_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        #     "attention_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-        # }
         return {
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "token_type_ids": NeuralType(('B', 'T'), ChannelType()),
@@ -67,10 +69,11 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        hidden_states: output embedding 
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     def __init__(
@@ -131,12 +134,6 @@ def __init__(
 
         self.add_module("roberta", model)
         self.config = model.config
-
-        # TK: storing config name in init_params instead.
-        # for key, value in self.config.to_dict().items():
-        #    self._local_parameters[key] = value
-
-        # Store the only value that will be used externally - hidden_size.
         self._hidden_size = model.config.hidden_size
 
     @property
@@ -152,12 +149,12 @@ def hidden_size(self):
     @staticmethod
     def list_pretrained_models() -> Optional[List[PretrainedModelInfo]]:
         pretrained_models = []
-        for key, value in ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.items():
+        for key in ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST:
             model_info = PretrainedModelInfo(
                 pretrained_model_name=key,
                 description="weights by HuggingFace",
                 parameters=ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP[key],
-                location=value,
+                location="",
             )
             pretrained_models.append(model_info)
         return pretrained_models
diff --git a/nemo/collections/nlp/nm/trainables/common/megatron/__init__.py b/nemo/collections/nlp/nm/trainables/common/megatron/__init__.py
new file mode 100644
index 000000000000..34bb64c10941
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/common/megatron/__init__.py
@@ -0,0 +1,23 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.utils import logging
+
+try:
+    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_bert_nm import *
+
+except Exception as e:
+    logging.error('Failed to import Megatron Neural Module: `{}` ({})'.format(str(e), type(e)))
diff --git a/nemo/collections/nlp/nm/trainables/common/megatron/megatron_bert_nm.py b/nemo/collections/nlp/nm/trainables/common/megatron/megatron_bert_nm.py
new file mode 100644
index 000000000000..5a7f76854c66
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/common/megatron/megatron_bert_nm.py
@@ -0,0 +1,140 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+
+import torch
+from megatron.initialize import initialize_megatron
+from megatron.model.bert_model import bert_attention_mask_func, bert_extended_attention_mask, bert_position_ids
+from megatron.model.language_model import get_language_model
+from megatron.model.utils import init_method_normal, scaled_init_method_normal
+
+from nemo.backends.pytorch.nm import TrainableNM
+from nemo.core import DeviceType
+from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['MegatronBERT']
+
+
+class MegatronBERT(TrainableNM):
+    """
+    MegatronBERT wraps around the Megatron Language model
+    from https://github.com/NVIDIA/Megatron-LM
+
+    Args:
+        config_file (str): path to model configuration file.
+        vocab_file (str): path to vocabulary file.
+        tokenizer_type (str): tokenizer type, currently only 'BertWordPieceLowerCase' supported.
+    """
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module input ports.
+        input_ids: input token ids
+        token_type_ids: segment type ids
+        attention_mask: attention mask
+        """
+        return {
+            "input_ids": NeuralType(('B', 'T'), ChannelType()),
+            "attention_mask": NeuralType(('B', 'T'), ChannelType()),
+            "token_type_ids": NeuralType(('B', 'T'), ChannelType(), optional=True),
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        hidden_states: output embedding 
+        """
+        return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
+
+    def __init__(
+        self,
+        model_name,
+        vocab_file,
+        hidden_size=1024,
+        num_attention_heads=16,
+        num_layers=24,
+        max_seq_length=512,
+        tokenizer_type='BertWordPieceLowerCase',
+        init_method_std=0.02,
+        num_tokentypes=2,
+    ):
+
+        super().__init__()
+
+        if not os.path.exists(vocab_file):
+            raise ValueError(f'Vocab file not found at {vocab_file}')
+
+        megatron_args = {
+            "num_layers": num_layers,
+            "hidden_size": hidden_size,
+            "num_attention_heads": num_attention_heads,
+            "max_position_embeddings": max_seq_length,
+            "tokenizer_type": tokenizer_type,
+            "vocab_file": vocab_file,
+        }
+
+        initialize_megatron(None, megatron_args, ignore_unknown_args=True)
+        init_method = init_method_normal(init_method_std)
+
+        self.language_model, self._language_model_key = get_language_model(
+            attention_mask_func=bert_attention_mask_func,
+            num_tokentypes=num_tokentypes,
+            add_pooler=False,
+            init_method=init_method,
+            scaled_init_method=scaled_init_method_normal(init_method_std, num_layers),
+        )
+
+        self.language_model.to(self._device)
+        self._hidden_size = self.language_model.hidden_size
+
+    @property
+    def hidden_size(self):
+        """
+            Property returning hidden size.
+
+            Returns:
+                Hidden size.
+        """
+        return self._hidden_size
+
+    def forward(self, input_ids, attention_mask, token_type_ids):
+        extended_attention_mask = bert_extended_attention_mask(
+            attention_mask, next(self.language_model.parameters()).dtype
+        )
+        position_ids = bert_position_ids(input_ids)
+
+        sequence_output = self.language_model(
+            input_ids, position_ids, extended_attention_mask, tokentype_ids=token_type_ids
+        )
+        return sequence_output
+
+    def restore_from(self, path, local_rank=0):
+        if self.placement == DeviceType.AllGpu:
+            load_device = f"cuda:{local_rank}"
+        else:
+            load_device = self._device
+
+        state_dict = torch.load(path, map_location=load_device)
+
+        # to load from Megatron pretrained checkpoint
+        if 'model' in state_dict:
+            self.language_model.load_state_dict(state_dict['model'][self._language_model_key])
+        else:
+            self.load_state_dict(state_dict)
diff --git a/nemo/collections/nlp/nm/trainables/common/megatron/megatron_utils.py b/nemo/collections/nlp/nm/trainables/common/megatron/megatron_utils.py
new file mode 100644
index 000000000000..49f5614126fc
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/common/megatron/megatron_utils.py
@@ -0,0 +1,121 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2020 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+
+import torch
+import wget
+from transformers import TRANSFORMERS_CACHE, cached_path
+
+__all__ = [
+    'MEGATRON_CACHE',
+    'MEGATRON_CONFIG_MAP',
+    'CONFIGS',
+    'get_megatron_lm_models_list',
+    'get_megatron_config_file',
+    'get_megatron_vocab_file',
+    'get_megatron_checkpoint',
+]
+
+MEGATRON_CACHE = os.path.join(os.path.dirname(str(TRANSFORMERS_CACHE)), 'megatron')
+
+CONFIGS = {'345m': {"hidden-size": 1024, "num-attention-heads": 16, "num-layers": 24, "max-seq-length": 512}}
+
+MEGATRON_CONFIG_MAP = {
+    'megatron-bert-345m-uncased': {
+        'config': CONFIGS['345m'],
+        'checkpoint': 'https://api.ngc.nvidia.com/v2/models/nvidia/megatron_bert_345m/versions/v0.0/files/release/mp_rank_00/model_optim_rng.pt',
+        'vocab': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt',
+        'do_lower_case': True,
+    },
+    'megatron-bert-uncased': {
+        'config': None,
+        'checkpoint': None,
+        'vocab': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt',
+        'do_lower_case': True,
+    },
+    'megatron-bert-cased': {
+        'config': None,
+        'vocab': 'https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt',
+        'do_lower_case': False,
+    },
+}
+
+
+def get_megatron_lm_models_list():
+    '''
+    Return the list of support Megatron models
+    '''
+    return list(MEGATRON_CONFIG_MAP.keys())
+
+
+def get_megatron_config_file(pretrained_model_name):
+    '''
+    Returns model config file
+    Args:
+        pretrained_model_name (str): pretrained model name
+    Returns:
+        config (dict): contains model configuration: number of hidden layers, number of attention heads, etc
+    '''
+    return MEGATRON_CONFIG_MAP[pretrained_model_name]['config']
+
+
+def get_megatron_vocab_file(pretrained_model_name):
+    '''
+    Gets vocabulary file from cache or downloads it
+    Args:
+        pretrained_model_name (str): pretrained model name
+    Returns:
+        path (str): path to the vocab file 
+    '''
+    url = MEGATRON_CONFIG_MAP[pretrained_model_name]['vocab']
+    path = cached_path(url, cache_dir=MEGATRON_CACHE)
+    return path
+
+
+def get_megatron_checkpoint(pretrained_model_name):
+    '''
+    Gets checkpoint file from cache or downloads it
+    Args:
+        pretrained_model_name (str): pretrained model name
+    Returns:
+        path (str): path to model checkpoint
+    '''
+    url = MEGATRON_CONFIG_MAP[pretrained_model_name]['checkpoint']
+    path = os.path.join(MEGATRON_CACHE, pretrained_model_name)
+
+    if not os.path.exists(path):
+        master_device = not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0
+        if not os.path.exists(path):
+            if master_device:
+                wget.download(url, path)
+            # wait until the master process downloads the file and writes it to the cache dir
+            if torch.distributed.is_initialized():
+                torch.distributed.barrier()
+
+    return path
+
+
+def is_lower_cased_megatron(pretrained_model_name):
+    '''
+    Returns if the megatron is cased or uncased
+    Args:
+        pretrained_model_name (str): pretrained model name
+    Returns:
+        do_lower_cased (bool): whether the model uses lower cased data
+    '''
+    return MEGATRON_CONFIG_MAP[pretrained_model_name]['do_lower_case']
diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py
index 60b1f2c45e7c..b233242536dc 100644
--- a/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/sequence_classification_nm.py
@@ -17,8 +17,9 @@
 from torch import nn as nn
 
 from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
 from nemo.core import ChannelType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['SequenceClassifier']
 
@@ -36,19 +37,24 @@ class SequenceClassifier(TrainableNM):
         activation (str): activation function applied in classifier MLP layers
         log_softmax (bool): whether to apply log_softmax to MLP output
         dropout (float): dropout ratio applied to MLP
+        use_transformer_pretrained (bool):
+            TODO
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        hidden_states: embedding hidden states
         """
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        logits: logits before loss
         """
-        # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})}
         return {"logits": NeuralType(('B', 'D'), LogitsType())}
 
     def __init__(
diff --git a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py
index 0989afd162ad..ec681b86d3fa 100644
--- a/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/sequence_regression_nm.py
@@ -17,8 +17,9 @@
 from torch import nn as nn
 
 from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
 from nemo.core import ChannelType, NeuralType, RegressionValuesType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['SequenceRegression']
 
@@ -34,20 +35,24 @@ class SequenceRegression(TrainableNM):
         num_layers (int): number of layers in classifier MLP
         activation (str): activation function applied in classifier MLP layers
         dropout (float): dropout ratio applied to MLP
+        use_transformer_pretrained (bool):
+            TODO
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        hidden_states: embedding hidden states
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        preds: predictions before loss
         """
-        # return {"preds": NeuralType({0: AxisType(RegressionTag)})}
         return {"preds": NeuralType(tuple('B'), RegressionValuesType())}
 
     def __init__(self, hidden_size, num_layers=2, activation='relu', dropout=0.0, use_transformer_pretrained=True):
diff --git a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py
index 1b4c879906c7..e101b411a5ee 100644
--- a/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/token_classification_nm.py
@@ -17,8 +17,10 @@
 from torch import nn as nn
 
 from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import gelu, transformer_weights_init
+from nemo.collections.nlp.utils.functional_utils import gelu
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
 from nemo.core import ChannelType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['BertTokenClassifier', 'TokenClassifier']
 
@@ -37,20 +39,24 @@ class BertTokenClassifier(TrainableNM):
         activation (str): activation function applied in classifier MLP layers
         log_softmax (bool): whether to apply log_softmax to MLP output
         dropout (float): dropout ratio applied to MLP
+        use_transformer_pretrained (bool):
+            TODO
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        hidden_states: embedding hidden states
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        logits: logits before loss
         """
-        # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())}
 
     def __init__(
@@ -101,17 +107,17 @@ class TokenClassifier(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
-        # return {"logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"logits": NeuralType(('B', 'T', 'D'), LogitsType())}
 
     def __init__(
@@ -125,9 +131,9 @@ def __init__(
         dropout=0.0,
         use_transformer_pretrained=True,
     ):
-        super().__init__()
+        # Pass name up the module class hierarchy.
+        super().__init__(name=name)
 
-        self.name = name
         self.mlp = MultiLayerPerceptron(hidden_size, num_classes, self._device, num_layers, activation, log_softmax)
         self.dropout = nn.Dropout(dropout)
         if use_transformer_pretrained:
@@ -135,11 +141,7 @@ def __init__(
         # self.to(self._device) # sometimes this is necessary
 
     def __str__(self):
-        name = TrainableNM.__str__(self)
-
-        if self.name:
-            name = self.name + name
-        return name
+        return self.name
 
     def forward(self, hidden_states):
         hidden_states = self.dropout(hidden_states)
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_decoders.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_decoders.py
index 1f3cbf0e4f44..d8baf206f97f 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_decoders.py
+++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_decoders.py
@@ -1,3 +1,20 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
 import copy
 
 import torch
@@ -7,7 +24,7 @@
     MultiHeadAttention,
     PositionWiseFF,
 )
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import form_attention_mask
+from nemo.collections.nlp.utils.transformer_utils import form_attention_mask
 
 __all__ = []
 
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_encoders.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_encoders.py
index 24c6afce55ad..8254bf3f38bd 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_encoders.py
+++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_encoders.py
@@ -1,3 +1,20 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
 import copy
 
 import torch
@@ -8,7 +25,7 @@
     PositionWiseFF,
     TwoStreamSelfAttention,
 )
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import form_attention_mask
+from nemo.collections.nlp.utils.transformer_utils import form_attention_mask
 
 __all__ = []
 
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_generators.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_generators.py
index d878ccd17655..1a0dbc2d47dd 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_generators.py
+++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_generators.py
@@ -1,13 +1,49 @@
-__all__ = []
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
 import torch
 import torch.nn as nn
 
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import NEG_INF
-from nemo.collections.nlp.utils.common_nlp_utils import mask_padded_tokens
+from nemo.collections.nlp.utils.data_utils import mask_padded_tokens
+from nemo.collections.nlp.utils.transformer_utils import NEG_INF
+
+__all__ = []
 
 
 class GreedySequenceGenerator(nn.Module):
+    """
+    Greedy sequence generator based on the decoder followed by log_softmax.
+
+    Args:
+        embedding: nn.Module, transforms input_ids into vector embeddings
+        decoder: nn.Module, takes embeddings and produces hidden_states
+        log_softmax: nn.Module, takes hidden_states and produces log_probs
+            which correspond to probability distribution of tokens (ids)
+        pad: index of padding token in the vocabulary
+        bos: index of beginning of sequence token in the vocabulary
+        eos: index of end of sequence token in the vocabulary
+        max_sequence_length: maximum allowed length for generated sequences
+        max_delta_length: in case of encoder-decoder generation (e.g. NMT),
+            forbids generated sequences to be longer than the length of
+            source sequences plus max_delta_length
+        batch_size: size of the batch of generated sequences if neither
+            source nor target starting sequences are provided
+    """
+
     def __init__(
         self,
         embedding,
@@ -20,25 +56,6 @@ def __init__(
         max_delta_length=20,
         batch_size=1,
     ):
-        """
-        Greedy sequence generator based on the decoder followed by log_softmax.
-
-        Args:
-            embedding: nn.Module, transforms input_ids into vector embeddings
-            decoder: nn.Module, takes embeddings and produces hidden_states
-            log_softmax: nn.Module, takes hidden_states and produces log_probs
-                which correspond to probability distribution of tokens (ids)
-            pad: index of padding token in the vocabulary
-            bos: index of beginning of sequence token in the vocabulary
-            eos: index of end of sequence token in the vocabulary
-            max_sequence_length: maximum allowed length for generated sequences
-            max_delta_length: in case of encoder-decoder generation (e.g. NMT),
-                forbids generated sequences to be longer than the length of
-                source sequences plus max_delta_length
-            batch_size: size of the batch of generated sequences if neither
-                source nor target starting sequences are provided
-        """
-
         super().__init__()
         self.embedding = embedding
         self.decoder = decoder
@@ -148,20 +165,20 @@ def forward(self, decoder_input_ids=None, encoder_hidden_states=None, encoder_in
 
 
 class TopKSequenceGenerator(GreedySequenceGenerator):
-    def __init__(self, embedding, decoder, log_softmax, beam_size=1, temperature=1.0, **kwargs):
-        """
-        Top-k sequence generator based on the decoder followed by log_softmax.
-
-        Args:
-            *all args of GreedySequenceGenerator class
-            beam_size: size of the beam (parameter k in top-k)
-            temperature: temperature of top-k sampling, all logits are divided
-                by temperature before rescaling. High temperature leads to
-                uniform distribution, low leads to delta-like distribution.
-        Kwargs:
-            all remaining parameters of GreedySequenceGenerator class
-        """
+    """
+    Top-k sequence generator based on the decoder followed by log_softmax.
+
+    Args:
+        *all args of GreedySequenceGenerator class
+        beam_size: size of the beam (parameter k in top-k)
+        temperature: temperature of top-k sampling, all logits are divided
+            by temperature before rescaling. High temperature leads to
+            uniform distribution, low leads to delta-like distribution.
+    Kwargs:
+        all remaining parameters of GreedySequenceGenerator class
+    """
 
+    def __init__(self, embedding, decoder, log_softmax, beam_size=1, temperature=1.0, **kwargs):
         super().__init__(embedding, decoder, log_softmax, **kwargs)
         self.beam_size = beam_size
         self.temp = temperature
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_modules.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_modules.py
index 153843e1aad0..50e66346e017 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_modules.py
+++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_modules.py
@@ -1,27 +1,19 @@
-# coding=utf-8
-"""
-Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc.
-Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Various parts of Transformer architecture implemented as Pytorch nn.Modules.
-Some parts of this code were adapted from the HuggingFace library at
-https://github.com/huggingface/pytorch-pretrained-BERT
-Some parts of this code were adapted from the Annotated Transformer at
-http://nlp.seas.harvard.edu/2018/04/03/attention.html
-Copyright by the HuggingFace and Annotated Transformer authors.
-"""
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
 import math
 
@@ -29,18 +21,19 @@
 from torch import nn
 
 from nemo import logging
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import gelu
+from nemo.collections.nlp.utils.functional_utils import gelu
 
 __all__ = []
 
 
 try:
     from apex.normalization import FusedLayerNorm
-except (AttributeError, ModuleNotFoundError):
-    # this is lie - it isn't fused in this case
-    logging.warning(
-        "Unable to import APEX. Mixed precision, distributed training and " "FusedLayerNorm are not available."
-    )
+
+    # Try to use FusedLayerNorm from Apex - this will trigger an error.
+    _ = FusedLayerNorm(8, eps=1e-5)
+
+except Exception as e:
+    logging.warning("Unable to import FusedLayerNorm  from APEX. Using regular LayerNorm instead.")
     from torch.nn import LayerNorm as FusedLayerNorm
 
 
@@ -108,9 +101,7 @@ def __init__(
     def forward(self, input_ids, token_type_ids=None, start_pos=0):
         seq_length = input_ids.size(1)
         if seq_length > self.max_sequence_length:
-            raise ValueError(
-                "Input sequence is longer than maximum allowed" " sequence length for positional encoding"
-            )
+            raise ValueError("Input sequence is longer than maximum allowed sequence length for positional encoding")
         position_ids = torch.arange(
             start=start_pos, end=start_pos + seq_length, dtype=torch.long, device=input_ids.device
         )
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py
index db858982adb1..ba9cc7128f23 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py
+++ b/nemo/collections/nlp/nm/trainables/common/transformer/transformer_nm.py
@@ -1,7 +1,20 @@
-# Copyright (c) 2019 NVIDIA Corporation
-"""
-This package contains Transformer for translation Neural Module
-"""
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
 
 import math
 
@@ -13,8 +26,9 @@
     GreedySequenceGenerator,
 )
 from nemo.collections.nlp.nm.trainables.common.transformer.transformer_modules import TransformerEmbedding
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
 from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['TransformerEncoderNM', 'TransformerDecoderNM', 'GreedyLanguageGeneratorNM', 'BeamSearchTranslatorNM']
 
@@ -45,22 +59,23 @@ class TransformerEncoderNM(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids: ids of input tokens
+        input_mask_src: input mask
         """
         return {
-            # "input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids": NeuralType(('B', 'T'), ChannelType()),
             "input_mask_src": NeuralType(('B', 'T'), ChannelType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
-
+        hidden_states: outputs hidden states
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     def __init__(
@@ -134,14 +149,15 @@ class TransformerDecoderNM(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids_tgt: ids of target sequence
+        hidden_states_src: input hidden states 
+        input_mask_src: input token mask
+        input_mask_tgt: target token mask
         """
         return {
-            # "input_ids_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # "input_mask_tgt": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "input_ids_tgt": NeuralType(('B', 'T'), ChannelType()),
             "hidden_states_src": NeuralType(('B', 'T', 'D'), ChannelType()),
             "input_mask_src": NeuralType(('B', 'T'), ChannelType()),
@@ -149,10 +165,11 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        hidden_states: output hidden states
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'D'), ChannelType())}
 
     def __init__(
@@ -216,17 +233,19 @@ class GreedyLanguageGeneratorNM(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        input_ids:  input ids
         """
-        # return {"input_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
         return {"input_ids": NeuralType(('B', 'T'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        output ids: output ids
         """
-        # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
         return {"output_ids": NeuralType(('B', 'T'), ChannelType())}
 
     def __init__(self, decoder, log_softmax, max_seq_length, pad_token, bos_token, eos_token, batch_size=1):
@@ -272,21 +291,23 @@ class BeamSearchTranslatorNM(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+        hidden_states_src: input hidden states
+        input_mask_src: input mask
         """
         return {
-            # "hidden_states_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # "input_mask_src": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "hidden_states_src": NeuralType(('B', 'T', 'C'), ChannelType()),
             "input_mask_src": NeuralType(('B', 'T'), ChannelType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
+        output_ids: output ids
         """
-        # return {"output_ids": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
         return {"output_ids": NeuralType(('B', 'T'), ChannelType())}
 
     @property
diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/__init__.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/__init__.py
index 80c44c46c050..05f3cde4c1ce 100644
--- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/__init__.py
+++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/__init__.py
@@ -13,5 +13,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-
-from nemo.collections.nlp.nm.trainables.dialogue_state_tracking.state_tracking_trade_nm import *
+from nemo.collections.nlp.nm.trainables.dialogue_state_tracking.sgd import *
+from nemo.collections.nlp.nm.trainables.dialogue_state_tracking.trade_generator_nm import *
diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/__init__.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/__init__.py
new file mode 100644
index 000000000000..7f7de4a67ec5
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/__init__.py
@@ -0,0 +1,18 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from nemo.collections.nlp.nm.trainables.dialogue_state_tracking.sgd.sgd_decoder_nm import *
+from nemo.collections.nlp.nm.trainables.dialogue_state_tracking.sgd.sgd_encoder_nm import *
diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_decoder_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_decoder_nm.py
new file mode 100644
index 000000000000..0427e363e204
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_decoder_nm.py
@@ -0,0 +1,435 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+'''
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/train_and_predict.py
+'''
+
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from nemo.backends.pytorch.nm import TrainableNM
+from nemo.core import ChannelType, EmbeddedTextType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['SGDDecoderNM']
+
+
+class LogitsAttention(nn.Module):
+    def __init__(self, num_classes, embedding_dim):
+        """Get logits for elements by using attention on token embedding.
+        Args:
+          num_classes (int): An int containing the number of classes for which logits are to be generated.
+          embedding_dim (int): hidden size of the BERT
+    
+        Returns:
+          A tensor of shape (batch_size, num_elements, num_classes) containing the logits.
+        """
+        super().__init__()
+        self.num_attention_heads = 16
+        self.attention_head_size = embedding_dim // self.num_attention_heads
+        self.embedding_dim = embedding_dim
+        self.num_classes = num_classes
+        self.dropout = nn.Dropout(0.1)
+
+        self.key = nn.Linear(embedding_dim, embedding_dim)
+        self.query = nn.Linear(embedding_dim, embedding_dim)
+        self.value = nn.Linear(embedding_dim, embedding_dim)
+        self.layer = nn.Linear(embedding_dim, num_classes)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, encoded_utterance, token_embeddings, element_embeddings, utterance_mask):
+        """
+        token_embeddings: token hidden states from BERT encoding of the utterance
+        encoded_utterance: [CLS] token hidden state from BERT encoding of the utterance
+        element_embeddings: A tensor of shape (batch_size, num_elements, embedding_dim) extracted from schema
+        utterance_mask: binary mask for token_embeddings, 1 for real tokens 0 for padded tokens
+        """
+        _, num_elements, _ = element_embeddings.size()
+
+        query_layer = self.query(element_embeddings)
+        key_layer = self.key(token_embeddings)
+        value_layer = self.value(token_embeddings)
+
+        query_layer = self.transpose_for_scores(query_layer)
+        key_layer = self.transpose_for_scores(key_layer)
+        value_layer = self.transpose_for_scores(value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if utterance_mask is not None:
+            negative_scores = (torch.finfo(attention_scores.dtype).max * -0.7) * torch.ones_like(attention_scores)
+            new_x_shape = (utterance_mask.size()[0],) + (1, 1) + (utterance_mask.size()[1],)
+            attention_scores = torch.where(
+                utterance_mask.view(*new_x_shape).to(bool), attention_scores, negative_scores
+            )
+
+        attention_probs = nn.Softmax(dim=-1)(attention_scores)
+        attention_probs = self.dropout(attention_probs)
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.embedding_dim,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+
+        logits = self.layer(context_layer)
+        return logits
+
+
+class Logits(nn.Module):
+    def __init__(self, num_classes, embedding_dim):
+        """Get logits for elements by conditioning on utterance embedding.
+        Args:
+          num_classes (int): An int containing the number of classes for which logits are to be generated.
+          embedding_dim (int): hidden size of the BERT
+    
+        Returns:
+          A tensor of shape (batch_size, num_elements, num_classes) containing the logits.
+        """
+        super().__init__()
+        self.num_classes = num_classes
+        self.utterance_proj = nn.Linear(embedding_dim, embedding_dim)
+        self.activation = F.gelu
+
+        self.layer1 = nn.Linear(2 * embedding_dim, embedding_dim)
+        self.layer2 = nn.Linear(embedding_dim, num_classes)
+
+    def forward(self, encoded_utterance, token_embeddings, element_embeddings, utterance_mask):
+        """
+        token_embeddings - token hidden states from BERT encoding of the utterance. Not used
+        encoded_utterance - [CLS] token hidden state from BERT encoding of the utterance
+        element_embeddings: A tensor of shape (batch_size, num_elements, embedding_dim).
+        utterance_mask: binary mask for token_embeddings, 1 for real tokens 0 for padded tokens. Not used
+        """
+        _, num_elements, _ = element_embeddings.size()
+
+        # Project the utterance embeddings.
+        utterance_embedding = self.utterance_proj(encoded_utterance)
+        utterance_embedding = self.activation(utterance_embedding)
+
+        # Combine the utterance and element embeddings.
+        repeated_utterance_embedding = utterance_embedding.unsqueeze(1).repeat(1, num_elements, 1)
+
+        utterance_element_emb = torch.cat([repeated_utterance_embedding, element_embeddings], axis=2)
+        logits = self.layer1(utterance_element_emb)
+        logits = self.activation(logits)
+        logits = self.layer2(logits)
+        return logits
+
+
+class SGDDecoderNM(TrainableNM):
+    """
+    Baseline model for schema guided dialogue state tracking with option to make schema embeddings learnable
+    """
+
+    @property
+    @add_port_docs()
+    def input_ports(self):
+        """Returns definitions of module output ports.
+        encoded_utterance (float): [CLS] token hidden state from BERT encoding of the utterance
+        token_embeddings (float): BERT encoding of utterance (all tokens)
+        utterance_mask (bool): Mask which takes the value 0 for padded tokens and 1 otherwise
+        cat_slot_values_mask (int): Masks out categorical slots values for slots not used in the service, takes values 0 and 1
+        intent_status_mask (int): Masks out padded intents in the service, takes values 0 and 1
+        service_ids (int): service ids
+        """
+        return {
+            "encoded_utterance": NeuralType(('B', 'T'), EmbeddedTextType()),
+            "token_embeddings": NeuralType(('B', 'T', 'C'), ChannelType()),
+            "utterance_mask": NeuralType(('B', 'T'), ChannelType()),
+            "cat_slot_values_mask": NeuralType(('B', 'T', 'C'), ChannelType()),
+            "intent_status_mask": NeuralType(('B', 'T'), ChannelType()),
+            "service_ids": NeuralType(('B'), ChannelType()),
+        }
+
+    @property
+    @add_port_docs()
+    def output_ports(self):
+        """Returns definitions of module output ports.
+            logit_intent_status (float): output for intent status
+            logit_req_slot_status (float): output for requested slots status
+            logit_cat_slot_status (float): output for categorical slots status
+            logit_cat_slot_value (float): output for categorical slots values
+            logit_noncat_slot_status (float): Output of SGD model
+            logit_noncat_slot_start (float): output for non categorical slots values start
+            logit_noncat_slot_end (float): output for non categorical slots values end
+        """
+        return {
+            "logit_intent_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_req_slot_status": NeuralType(('B', 'T'), LogitsType()),
+            "logit_cat_slot_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_cat_slot_value": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_noncat_slot_status": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_noncat_slot_start": NeuralType(('B', 'T', 'C'), LogitsType()),
+            "logit_noncat_slot_end": NeuralType(('B', 'T', 'C'), LogitsType()),
+        }
+
+    def __init__(self, embedding_dim, schema_emb_processor, add_attention_head=False):
+        """Get logits for elements by conditioning on utterance embedding.
+
+        Args:
+            embedding_dim (int): hidden size of the BERT
+            schema_emb_processor (obj): contains schema embeddings for services and config file
+            head_transform (str): transformation to use for computing head
+        """
+        super().__init__()
+
+        # Add a trainable vector for the NONE intent
+        self.none_intent_vector = torch.empty((1, 1, embedding_dim), requires_grad=True).to(self._device)
+        # TODO truncated norm init
+        nn.init.normal_(self.none_intent_vector, std=0.02)
+        self.none_intent_vector = torch.nn.Parameter(self.none_intent_vector).to(self._device)
+
+        if add_attention_head:
+            projection_module = LogitsAttention
+        else:
+            projection_module = Logits
+
+        self.intent_layer = projection_module(1, embedding_dim).to(self._device)
+        self.requested_slots_layer = projection_module(1, embedding_dim).to(self._device)
+
+        self.cat_slot_value_layer = projection_module(1, embedding_dim).to(self._device)
+
+        # Slot status values: none, dontcare, active.
+        self.cat_slot_status_layer = projection_module(3, embedding_dim).to(self._device)
+        self.noncat_slot_layer = projection_module(3, embedding_dim).to(self._device)
+
+        # dim 2 for non_categorical slot - to represent start and end position
+        self.noncat_layer1 = nn.Linear(2 * embedding_dim, embedding_dim).to(self._device)
+        self.noncat_activation = F.gelu
+        self.noncat_layer2 = nn.Linear(embedding_dim, 2).to(self._device)
+
+        config = schema_emb_processor.schema_config
+        num_services = len(schema_emb_processor.schemas.services)
+        self.intents_emb = nn.Embedding(num_services, config["MAX_NUM_INTENT"] * embedding_dim)
+        self.cat_slot_emb = nn.Embedding(num_services, config["MAX_NUM_CAT_SLOT"] * embedding_dim)
+        self.cat_slot_value_emb = nn.Embedding(
+            num_services, config["MAX_NUM_CAT_SLOT"] * config["MAX_NUM_VALUE_PER_CAT_SLOT"] * embedding_dim
+        )
+        self.noncat_slot_emb = nn.Embedding(num_services, config["MAX_NUM_NONCAT_SLOT"] * embedding_dim)
+        self.req_slot_emb = nn.Embedding(
+            num_services, (config["MAX_NUM_CAT_SLOT"] + config["MAX_NUM_NONCAT_SLOT"]) * embedding_dim
+        )
+
+        # initialize schema embeddings from the BERT generated embeddings
+        schema_embeddings = schema_emb_processor.get_schema_embeddings()
+        self.intents_emb.weight.data.copy_(
+            torch.from_numpy(np.stack(schema_embeddings['intent_emb']).reshape(num_services, -1))
+        )
+        self.cat_slot_emb.weight.data.copy_(
+            torch.from_numpy(np.stack(schema_embeddings['cat_slot_emb']).reshape(num_services, -1))
+        )
+        self.cat_slot_value_emb.weight.data.copy_(
+            torch.from_numpy(np.stack(schema_embeddings['cat_slot_value_emb']).reshape(num_services, -1))
+        )
+        self.noncat_slot_emb.weight.data.copy_(
+            torch.from_numpy(np.stack(schema_embeddings['noncat_slot_emb']).reshape(num_services, -1))
+        )
+        self.req_slot_emb.weight.data.copy_(
+            torch.from_numpy(np.stack(schema_embeddings['req_slot_emb']).reshape(num_services, -1))
+        )
+
+        if not schema_emb_processor.is_trainable:
+            self.intents_emb.weight.requires_grad = False
+            self.cat_slot_emb.weight.requires_grad = False
+            self.cat_slot_value_emb.weight.requires_grad = False
+            self.noncat_slot_emb.weight.requires_grad = False
+            self.req_slot_emb.weight.requires_grad = False
+
+        self.to(self._device)
+
+    def forward(
+        self,
+        encoded_utterance,
+        token_embeddings,
+        utterance_mask,
+        cat_slot_values_mask,
+        service_ids,
+        intent_status_mask,
+    ):
+        batch_size, emb_dim = encoded_utterance.size()
+        intent_embeddings = self.intents_emb(service_ids).view(batch_size, -1, emb_dim)
+        cat_slot_emb = self.cat_slot_emb(service_ids).view(batch_size, -1, emb_dim)
+        max_number_cat_slots = cat_slot_emb.shape[1]
+        cat_slot_value_emb = self.cat_slot_value_emb(service_ids).view(batch_size, max_number_cat_slots, -1, emb_dim)
+        noncat_slot_emb = self.noncat_slot_emb(service_ids).view(batch_size, -1, emb_dim)
+        req_slot_emb = self.req_slot_emb(service_ids).view(batch_size, -1, emb_dim)
+
+        logit_intent_status = self._get_intents(
+            encoded_utterance, intent_embeddings, intent_status_mask, token_embeddings, utterance_mask
+        )
+
+        logit_req_slot_status = self._get_requested_slots(
+            encoded_utterance, req_slot_emb, token_embeddings, utterance_mask
+        )
+
+        logit_cat_slot_status, logit_cat_slot_value = self._get_categorical_slot_goals(
+            encoded_utterance, cat_slot_emb, cat_slot_value_emb, cat_slot_values_mask, token_embeddings, utterance_mask
+        )
+
+        (
+            logit_noncat_slot_status,
+            logit_noncat_slot_start,
+            logit_noncat_slot_end,
+        ) = self._get_noncategorical_slot_goals(encoded_utterance, utterance_mask, noncat_slot_emb, token_embeddings)
+
+        return (
+            logit_intent_status,
+            logit_req_slot_status,
+            logit_cat_slot_status,
+            logit_cat_slot_value,
+            logit_noncat_slot_status,
+            logit_noncat_slot_start,
+            logit_noncat_slot_end,
+        )
+
+    def _get_intents(self, encoded_utterance, intent_embeddings, intent_status_mask, token_embeddings, utterance_mask):
+        """
+        Args:
+            intent_embedding - BERT schema embeddings
+            encoded_utterance - representation of untterance
+            intent_status_mask - masks out intent not used for the service
+        """
+        batch_size = intent_embeddings.size()[0]
+
+        # Add a trainable vector for the NONE intent.
+        repeated_none_intent_vector = self.none_intent_vector.repeat(batch_size, 1, 1)
+        intent_embeddings = torch.cat([repeated_none_intent_vector, intent_embeddings], axis=1)
+        logits = self.intent_layer(
+            encoded_utterance=encoded_utterance,
+            token_embeddings=token_embeddings,
+            element_embeddings=intent_embeddings,
+            utterance_mask=utterance_mask,
+        )
+        logits = logits.squeeze(axis=-1)  # Shape: (batch_size, max_intents + 1)
+
+        # Mask out logits for padded intents
+        negative_logits = self._get_negative_logits(logits)
+        return torch.where(intent_status_mask.to(dtype=torch.bool), logits, negative_logits)
+
+    def _get_requested_slots(self, encoded_utterance, requested_slot_emb, token_embeddings, utterance_mask):
+        """Obtain logits for requested slots."""
+
+        logits = self.requested_slots_layer(
+            encoded_utterance=encoded_utterance,
+            token_embeddings=token_embeddings,
+            element_embeddings=requested_slot_emb,
+            utterance_mask=utterance_mask,
+        )
+        logits = logits.squeeze(axis=-1)
+
+        # logits shape: (batch_size, max_num_slots)
+        logits = logits.squeeze(axis=-1)
+        return logits
+
+    def _get_categorical_slot_goals(
+        self,
+        encoded_utterance,
+        cat_slot_emb,
+        cat_slot_value_emb,
+        cat_slot_values_mask,
+        token_embeddings,
+        utterance_mask,
+    ):
+        """
+        Obtain logits for status and values for categorical slots
+        Slot status values: none, dontcare, active
+        """
+
+        # Predict the status of all categorical slots.
+        status_logits = self.cat_slot_status_layer(
+            encoded_utterance=encoded_utterance,
+            token_embeddings=token_embeddings,
+            element_embeddings=cat_slot_emb,
+            utterance_mask=utterance_mask,
+        )
+
+        # Predict the goal value.
+        # Shape: (batch_size, max_categorical_slots, max_categorical_values, embedding_dim).
+        _, max_num_slots, max_num_values, embedding_dim = cat_slot_value_emb.size()
+        cat_slot_value_emb_reshaped = cat_slot_value_emb.view(-1, max_num_slots * max_num_values, embedding_dim)
+
+        value_logits = self.cat_slot_value_layer(
+            encoded_utterance=encoded_utterance,
+            token_embeddings=token_embeddings,
+            element_embeddings=cat_slot_value_emb_reshaped,
+            utterance_mask=utterance_mask,
+        )
+
+        # Reshape to obtain the logits for all slots.
+        value_logits = value_logits.view(-1, max_num_slots, max_num_values)
+
+        # Mask out logits for padded slots and values because they will be softmaxed
+        negative_value_logits = self._get_negative_logits(value_logits)
+        value_logits = torch.where(cat_slot_values_mask.to(dtype=torch.bool), value_logits, negative_value_logits)
+        return status_logits, value_logits
+
+    def _get_noncategorical_slot_goals(self, encoded_utterance, utterance_mask, noncat_slot_emb, token_embeddings):
+        """
+        Obtain logits for status and slot spans for non-categorical slots.
+        Slot status values: none, dontcare, active
+        """
+        # Predict the status of all non-categorical slots.
+        max_num_slots = noncat_slot_emb.size()[1]
+        status_logits = self.noncat_slot_layer(
+            encoded_utterance=encoded_utterance,
+            token_embeddings=token_embeddings,
+            element_embeddings=noncat_slot_emb,
+            utterance_mask=utterance_mask,
+        )
+
+        # Predict the distribution for span indices.
+        max_num_tokens = token_embeddings.size()[1]
+
+        repeated_token_embeddings = token_embeddings.unsqueeze(1).repeat(1, max_num_slots, 1, 1)
+        repeated_slot_embeddings = noncat_slot_emb.unsqueeze(2).repeat(1, 1, max_num_tokens, 1)
+
+        # Shape: (batch_size, max_num_slots, max_num_tokens, 2 * embedding_dim).
+        slot_token_embeddings = torch.cat([repeated_slot_embeddings, repeated_token_embeddings], axis=3)
+
+        # Project the combined embeddings to obtain logits, Shape: (batch_size, max_num_slots, max_num_tokens, 2)
+        span_logits = self.noncat_layer1(slot_token_embeddings)
+        span_logits = self.noncat_activation(span_logits)
+        span_logits = self.noncat_layer2(span_logits)
+
+        # Mask out invalid logits for padded tokens.
+        utterance_mask = utterance_mask.to(bool)  # Shape: (batch_size, max_num_tokens).
+        repeated_utterance_mask = utterance_mask.unsqueeze(1).unsqueeze(3).repeat(1, max_num_slots, 1, 2)
+        negative_logits = (torch.finfo(span_logits.dtype).max * -0.7) * torch.ones(
+            span_logits.size(), device=self._device, dtype=span_logits.dtype
+        )
+
+        span_logits = torch.where(repeated_utterance_mask, span_logits, negative_logits)
+
+        # Shape of both tensors: (batch_size, max_num_slots, max_num_tokens).
+        span_start_logits, span_end_logits = torch.unbind(span_logits, dim=3)
+        return status_logits, span_start_logits, span_end_logits
+
+    def _get_negative_logits(self, logits):
+        # returns tensor with negative logits that will be used to mask out unused values
+        # for a particular service
+        negative_logits = (torch.finfo(logits.dtype).max * -0.7) * torch.ones(
+            logits.size(), device=self._device, dtype=logits.dtype
+        )
+        return negative_logits
diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_encoder_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_encoder_nm.py
new file mode 100644
index 000000000000..13c1887f04b2
--- /dev/null
+++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/sgd/sgd_encoder_nm.py
@@ -0,0 +1,90 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+# Copyright 2019 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+'''
+This file contains code artifacts adapted from the original implementation:
+https://github.com/google-research/google-research/blob/master/schema_guided_dst/baseline/train_and_predict.py
+'''
+
+from torch import nn
+
+from nemo.backends.pytorch.nm import TrainableNM
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
+from nemo.core import ChannelType, EmbeddedTextType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['SGDEncoderNM']
+
+ACTIVATIONS_F = {
+    "tanh": nn.Tanh,
+    "relu": nn.ReLU,
+}
+
+
+class SGDEncoderNM(TrainableNM):
+    """
+    Neural module which extracts the first token from the BERT representation of the utterance
+    followed by a fully connected layer.
+
+    Args:
+        hidden_size (int): hidden size of the BERT model
+        activation (str): activation function applied
+        dropout (float): dropout ratio
+    """
+
+    @property
+    @add_port_docs
+    def input_ports(self):
+        """
+        Returns definitions of module input ports.
+        hidden_states (float): BERT representation of the utterance
+        """
+        return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())}
+
+    @property
+    @add_port_docs
+    def output_ports(self):
+        """Returns definitions of module output ports.
+        logits (float): First token of the BERT representation of the utterance followed by fc and dropout
+        hidden_states (float) : BERT representation of the utterance with applied dropout
+        """
+        return {
+            "logits": NeuralType(('B', 'T'), EmbeddedTextType()),
+            "hidden_states": NeuralType(('B', 'T', 'C'), ChannelType()),
+        }
+
+    def __init__(self, hidden_size, activation='tanh', dropout=0.0, use_transformer_pretrained=True):
+        super().__init__()
+        self.fc = nn.Linear(hidden_size, hidden_size).to(self._device)
+
+        if activation not in ACTIVATIONS_F:
+            raise ValueError(f'{activation} is not in supported ' + '{ACTIVATIONS_F.keys()}')
+
+        self.activation = ACTIVATIONS_F[activation]()
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        if use_transformer_pretrained:
+            self.apply(lambda module: transformer_weights_init(module, xavier=False))
+        # self.to(self._device) # sometimes this is necessary
+
+    def forward(self, hidden_states):
+        first_token_hidden_states = hidden_states[:, 0]
+        logits = self.fc(first_token_hidden_states)
+        logits = self.activation(logits)
+        logits = self.dropout1(logits)
+        return logits, self.dropout2(hidden_states)
diff --git a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/trade_generator_nm.py
similarity index 87%
rename from nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py
rename to nemo/collections/nlp/nm/trainables/dialogue_state_tracking/trade_generator_nm.py
index 1e047542e3ba..b753a19763f4 100644
--- a/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/state_tracking_trade_nm.py
+++ b/nemo/collections/nlp/nm/trainables/dialogue_state_tracking/trade_generator_nm.py
@@ -46,55 +46,45 @@
 
 from nemo.backends.pytorch.nm import TrainableNM
 from nemo.core.neural_types import ChannelType, LabelsType, LengthsType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['TRADEGenerator']
 
 
 class TRADEGenerator(TrainableNM):
+    """
+    The generator module for state tracking model TRADE
+    Args:
+        vocab (Vocab): an instance of Vocab containing the vocabularey
+        embeddings (Tensor): word embedding matrix
+        hid_size (int): hidden size of the GRU decoder
+        dropout (float): dropout of the GRU
+        slots (list): list of slots
+        nb_gate (int): number of gates
+        teacher_forcing (float): 0.5
+    """
+
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
-
-        encoder_hidden: hidden states of the encoder
-
-        encoder_outputs: outputs of the encoder
-
-        input_lens: lengths of the input sequences to encoder
-
-        src_ids: input sequences to encoder
-
-        targets: targets for the output of the generator
-
         """
         return {
-            # 'encoder_hidden': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # 'encoder_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
-            # 'input_lens': NeuralType({0: AxisType(BatchTag)}),
-            # 'src_ids': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
-            # 'targets': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}),
             'encoder_hidden': NeuralType(('B', 'T', 'C'), ChannelType()),
             'encoder_outputs': NeuralType(('B', 'T', 'C'), ChannelType()),
             'input_lens': NeuralType(tuple('B'), LengthsType()),
             'src_ids': NeuralType(('B', 'T'), ChannelType()),
-            # 'targets': NeuralType(ChannelType(), ('B', 'D', 'T')),
             'targets': NeuralType(('B', 'D', 'T'), LabelsType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
         point_outputs: outputs of the generator
-
         gate_outputs: outputs of gating heads
-
         """
-        # return {
-        #     'point_outputs': NeuralType(
-        #         {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)}
-        #     ),
-        #     'gate_outputs': NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag)}),
-        # }
         return {
             'point_outputs': NeuralType(('B', 'T', 'D', 'D'), LogitsType()),
             'gate_outputs': NeuralType(('B', 'D', 'D'), LogitsType()),
@@ -131,7 +121,6 @@ def _slots_split_to_index(self):
         self.subslot_idx = torch.tensor([self.slot_w2i[slot] for slot in slots], device=self._device)
 
     def forward(self, encoder_hidden, encoder_outputs, input_lens, src_ids, targets=None):
-
         if (not self.training) or (random.random() > self.teacher_forcing):
             use_teacher_forcing = False
         else:
@@ -153,7 +142,7 @@ def forward(self, encoder_hidden, encoder_outputs, input_lens, src_ids, targets=
         slot_emb = slot_emb.unsqueeze(1)
         slot_emb = slot_emb.repeat(1, batch_size, 1)
         decoder_input = self.dropout(slot_emb).view(-1, self.hidden_size)
-        hidden = encoder_hidden.transpose(0, 1).repeat(len(self.slots), 1, 1)
+        hidden = encoder_hidden[:, 0:1, :].transpose(0, 1).repeat(len(self.slots), 1, 1)
 
         hidden = hidden.view(-1, self.hidden_size).unsqueeze(0)
 
diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/__init__.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/__init__.py
index fdccbbbf6809..a5b44aa590d1 100644
--- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/__init__.py
+++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/__init__.py
@@ -14,4 +14,4 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_nm import *
+from nemo.collections.nlp.nm.trainables.joint_intent_slot.joint_intent_slot_classifier_nm import *
diff --git a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_classifier_nm.py
similarity index 82%
rename from nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py
rename to nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_classifier_nm.py
index c906417afd6d..461a25c902e6 100644
--- a/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_nm.py
+++ b/nemo/collections/nlp/nm/trainables/joint_intent_slot/joint_intent_slot_classifier_nm.py
@@ -17,8 +17,9 @@
 from torch import nn as nn
 
 from nemo.backends.pytorch import MultiLayerPerceptron, TrainableNM
-from nemo.collections.nlp.nm.trainables.common.transformer.transformer_utils import transformer_weights_init
+from nemo.collections.nlp.utils.transformer_utils import transformer_weights_init
 from nemo.core import ChannelType, LogitsType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ['JointIntentSlotClassifier']
 
@@ -34,34 +35,31 @@ class JointIntentSlotClassifier(TrainableNM):
         num_intents (int): number of intents
         num_slots (int): number of slots
         dropout (float): dropout to be applied to the layer
+        use_transformer_pretrained (bool):
+            TODO
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
+
+        hidden_states:
+            TODO
         """
-        # return {"hidden_states": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)})}
         return {"hidden_states": NeuralType(('B', 'T', 'C'), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
         intent_logits:
-            0: AxisType(BatchTag)
-
-            1: AxisType(ChannelTag)
-
+            TODO
         slot_logits:
-            0: AxisType(BatchTag)
-
-            1: AxisType(TimeTag)
-
-            2: AxisType(ChannelTag)
+            TODO
         """
         return {
-            # "intent_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
-            # "slot_logits": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag)}),
             "intent_logits": NeuralType(('B', 'D'), LogitsType()),
             "slot_logits": NeuralType(('B', 'T', 'D'), LogitsType()),
         }
@@ -82,7 +80,6 @@ def __init__(self, hidden_size, num_intents, num_slots, dropout=0.0, use_transfo
         )
         if use_transformer_pretrained:
             self.apply(lambda module: transformer_weights_init(module, xavier=False))
-        # self.to(self._device)
 
     def forward(self, hidden_states):
         hidden_states = self.dropout(hidden_states)
diff --git a/nemo/collections/nlp/utils/__init__.py b/nemo/collections/nlp/utils/__init__.py
index 9a0f97ecdc63..cf26efc88753 100644
--- a/nemo/collections/nlp/utils/__init__.py
+++ b/nemo/collections/nlp/utils/__init__.py
@@ -1,4 +1,20 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
 from nemo.collections.nlp.utils.callback_utils import *
-from nemo.collections.nlp.utils.common_nlp_utils import *
-from nemo.collections.nlp.utils.huggingface_utils import *
-from nemo.collections.nlp.utils.loss_utils import *
+from nemo.collections.nlp.utils.data_utils import *
+from nemo.collections.nlp.utils.functional_utils import *
+from nemo.collections.nlp.utils.transformer_utils import *
diff --git a/nemo/collections/nlp/utils/callback_utils.py b/nemo/collections/nlp/utils/callback_utils.py
index 4d03b018185f..a6dc86c1e6f9 100644
--- a/nemo/collections/nlp/utils/callback_utils.py
+++ b/nemo/collections/nlp/utils/callback_utils.py
@@ -19,11 +19,11 @@
 
 import numpy as np
 from matplotlib import pyplot as plt
-from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report, confusion_matrix, f1_score
 
 from nemo import logging
 
-__all__ = ['list2str', 'tensor2list', 'plot_confusion_matrix']
+__all__ = ['list2str', 'tensor2list', 'plot_confusion_matrix', 'tensor2numpy']
 
 
 def list2str(l):
@@ -34,6 +34,10 @@ def tensor2list(tensor):
     return tensor.detach().cpu().tolist()
 
 
+def tensor2numpy(tensor):
+    return tensor.detach().cpu().numpy()
+
+
 def plot_confusion_matrix(labels, preds, graph_fold, label_ids=None, normalize=False, prefix=''):
     '''
     Plot confusion matrix.
@@ -95,3 +99,33 @@ def _plot_confusion_matrix(labels, preds, graph_fold):
     plt.ylabel('True')
     os.makedirs(graph_fold, exist_ok=True)
     plt.savefig(os.path.join(graph_fold, time.strftime('%Y%m%d-%H%M%S')))
+
+
+def get_classification_report(labels, preds, label_ids):
+    """
+    Returns classification report
+    """
+    # remove labels from label_ids that don't appear in predictions or ground truths
+    used_labels = set(labels) | set(preds)
+    labels_names = [
+        k + ' (label id: ' + str(v) + ')'
+        for k, v in sorted(label_ids.items(), key=lambda item: item[1])
+        if v in used_labels
+    ]
+
+    return classification_report(labels, preds, target_names=labels_names, digits=4)
+
+
+def get_f1_scores(labels, preds, average_modes=['binary', 'weighted', 'macro', 'micro']):
+    """
+    Returns a dictionary with f1_score based on different averaging mode
+    Args:
+      labels (list of ints): list of true labels
+      preds (list of ints): list of predicted labels
+      average_modes (list): list of possible averaging types. Binary for is supported only for binary target.
+    """
+    f1_scores = {}
+    for average in average_modes:
+        f1_scores['F1 ' + average] = round(f1_score(labels, preds, average=average) * 100, 2)
+
+    return f1_scores
diff --git a/nemo/collections/nlp/utils/common_nlp_utils.py b/nemo/collections/nlp/utils/common_nlp_utils.py
deleted file mode 100644
index cb6737bac97e..000000000000
--- a/nemo/collections/nlp/utils/common_nlp_utils.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-import re
-import string
-
-import numpy as np
-
-from nemo import logging
-
-__all__ = [
-    '_is_whitespace',
-    'mask_padded_tokens',
-    'read_intent_slot_outputs',
-    'get_vocab',
-    'write_vocab',
-    'label2idx',
-    'write_vocab_in_order',
-    'if_exist',
-    'remove_punctuation_from_sentence',
-    'ids2text',
-    'calc_class_weights',
-]
-
-
-def _is_whitespace(c):
-    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
-        return True
-    return False
-
-
-def mask_padded_tokens(tokens, pad_id):
-    mask = tokens != pad_id
-    return mask
-
-
-def read_intent_slot_outputs(
-    queries, intent_file, slot_file, intent_logits, slot_logits, slot_masks, intents=None, slots=None
-):
-    intent_dict = get_vocab(intent_file)
-    slot_dict = get_vocab(slot_file)
-    pred_intents = np.argmax(intent_logits, 1)
-    pred_slots = np.argmax(slot_logits, axis=2)
-    slot_masks = slot_masks > 0.5
-    for i, query in enumerate(queries):
-        logging.info(f'Query: {query}')
-        pred = pred_intents[i]
-        logging.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}')
-        if intents is not None:
-            logging.info(f'True intent:\t{intents[i]}\t{intent_dict[intents[i]]}')
-
-        pred_slot = pred_slots[i][slot_masks[i]]
-        tokens = query.strip().split()
-
-        if len(pred_slot) != len(tokens):
-            raise ValueError('Pred_slot and tokens must be of the same length')
-
-        for j, token in enumerate(tokens):
-            output = f'{token}\t{slot_dict[pred_slot[j]]}'
-            if slots is not None:
-                output = f'{output}\t{slot_dict[slots[i][j]]}'
-            logging.info(output)
-
-
-def get_vocab(file):
-    lines = open(file, 'r').readlines()
-    lines = [line.strip() for line in lines if line.strip()]
-    labels = {i: lines[i] for i in range(len(lines))}
-    return labels
-
-
-def write_vocab(items, outfile):
-    vocab = {}
-    idx = 0
-    with open(outfile, 'w') as f:
-        for item in items:
-            f.write(item + '\n')
-            vocab[item] = idx
-            idx += 1
-    return vocab
-
-
-def label2idx(file):
-    lines = open(file, 'r').readlines()
-    lines = [line.strip() for line in lines if line.strip()]
-    labels = {lines[i]: i for i in range(len(lines))}
-    return labels
-
-
-def write_vocab_in_order(vocab, outfile):
-    with open(outfile, 'w') as f:
-        for key in sorted(vocab.keys()):
-            f.write(f'{vocab[key]}\n')
-
-
-def if_exist(outfold, files):
-    if not os.path.exists(outfold):
-        return False
-    for file in files:
-        if not os.path.exists(f'{outfold}/{file}'):
-            return False
-    return True
-
-
-def remove_punctuation_from_sentence(sentence):
-    sentence = re.sub('[' + string.punctuation + ']', '', sentence)
-    sentence = sentence.lower()
-    return sentence
-
-
-def ids2text(ids, vocab):
-    return ' '.join([vocab[int(id_)] for id_ in ids])
-
-
-def calc_class_weights(label_freq):
-    """
-    Goal is to give more weight to the classes with less samples
-    so as to match the one with the higest frequency. We achieve this by
-    dividing the highest frequency by the freq of each label.
-    Example -
-    [12, 5, 3] -> [12/12, 12/5, 12/3] -> [1, 2.4, 4]
-
-    Here label_freq is assumed to be sorted by the frequency. I.e.
-    label_freq[0] is the most frequent element.
-
-    """
-
-    most_common_label_freq = label_freq[0]
-    weighted_slots = sorted([(index, most_common_label_freq[1] / freq) for (index, freq) in label_freq])
-    return [weight for (_, weight) in weighted_slots]
diff --git a/nemo/collections/nlp/utils/data_utils.py b/nemo/collections/nlp/utils/data_utils.py
new file mode 100644
index 000000000000..1c3dcf5f0db8
--- /dev/null
+++ b/nemo/collections/nlp/utils/data_utils.py
@@ -0,0 +1,66 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import re
+import string
+
+import numpy as np
+
+__all__ = ['get_vocab', 'get_tokens', 'normalize_answer', 'mask_padded_tokens', 'concatenate']
+
+
+def get_vocab(file):
+    lines = open(file, 'r').readlines()
+    lines = [line.strip() for line in lines if line.strip()]
+    labels = {i: lines[i] for i in range(len(lines))}
+    return labels
+
+
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+
+    def white_space_fix(text):
+        return ' '.join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+
+def get_tokens(s):
+    if not s:
+        return []
+    return normalize_answer(s).split()
+
+
+def mask_padded_tokens(tokens, pad_id):
+    mask = tokens != pad_id
+    return mask
+
+
+def concatenate(lists):
+    """
+    Helper function for inference
+    """
+    return np.concatenate([t.cpu() for t in lists])
diff --git a/nemo/collections/nlp/utils/evaluation_utils.py b/nemo/collections/nlp/utils/evaluation_utils.py
new file mode 100644
index 000000000000..0c960c32123f
--- /dev/null
+++ b/nemo/collections/nlp/utils/evaluation_utils.py
@@ -0,0 +1,151 @@
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import numpy as np
+
+from nemo import logging
+
+
+def analyze_confusion_matrix(cm, dict, max_pairs=10):
+    """
+    Sort all confusions in the confusion matrix by value and display results.
+    Print results in a format: (name -> name, value)
+    Args:
+        cm: Confusion matrix
+        dict: Dictionary with key as a name and index as a value (Intents or Slots)
+        max_pairs: Max number of confusions to print
+    """
+    threshold = 5  # just arbitrary value to take confusion with at least this number
+    confused_pairs = {}
+    size = cm.shape[0]
+    for i in range(size):
+        res = cm[i].argsort()
+        for j in range(size):
+            pos = res[size - j - 1]
+            # no confusion - same row and column
+            if pos == i:
+                continue
+            elif cm[i][pos] >= threshold:
+                str = f'{dict[i]} -> {dict[pos]}'
+                confused_pairs[str] = cm[i][pos]
+            else:
+                break
+
+    # sort by max confusions and print first max_pairs
+    sorted_confused_pairs = sorted(confused_pairs.items(), key=lambda x: x[1], reverse=True)
+    for i, pair_str in enumerate(sorted_confused_pairs):
+        if i >= max_pairs:
+            break
+        logging.info(pair_str)
+
+
+def errors_per_class(cm, dict):
+    """
+    Summarize confusions per each class in the confusion matrix.
+    It can be useful both for Intents and Slots.
+    It counts each confusion twice in both directions.
+    Args:
+        cm: Confusion matrix
+        dict: Dictionary with key as a name and index as a value (Intents or Slots)
+    """
+    size = cm.shape[0]
+    confused_per_class = {}
+    total_errors = 0
+    for class_num in range(size):
+        sum = 0
+        for i in range(size):
+            if i != class_num:
+                sum += cm[class_num][i]
+                sum += cm[i][class_num]
+        confused_per_class[dict[class_num]] = sum
+        total_errors += sum
+        # logging.info(f'{dict[class_num]} - {sum}')
+
+    logging.info(f'Total errors (multiplied by 2): {total_errors}')
+    sorted_confused_per_class = sorted(confused_per_class.items(), key=lambda x: x[1], reverse=True)
+    for conf_str in sorted_confused_per_class:
+        logging.info(conf_str)
+
+
+def log_misclassified_queries(intent_labels, intent_preds, queries, intent_dict, limit=50):
+    """
+    Display examples of Intent mistakes.
+    In a format: Query, predicted and labeled intent names.
+    """
+    logging.info(f'*** Misclassified intent queries (limit {limit}) ***')
+    cnt = 0
+    for i in range(len(intent_preds)):
+        if intent_labels[i] != intent_preds[i]:
+            query = queries[i].split('\t')[0]
+            logging.info(
+                f'{query} (predicted: {intent_dict[intent_preds[i]]} - labeled: {intent_dict[intent_labels[i]]})'
+            )
+            cnt = cnt + 1
+            if cnt >= limit:
+                break
+
+
+def log_misclassified_slots(
+    intent_labels, intent_preds, slot_labels, slot_preds, subtokens_mask, queries, intent_dict, slot_dict, limit=50
+):
+    """
+    Display examples of Slot mistakes.
+    In a format: Query, predicted and labeled intent names and list of predicted and labeled slot numbers.
+    also prints dictionary of the slots at the start for easier reading.
+    """
+    logging.info('')
+    logging.info(f'*** Misclassified slots queries (limit {limit}) ***')
+    # print slot dictionary
+    logging.info(f'Slot dictionary:')
+    str = ''
+    for i, slot in enumerate(slot_dict):
+        str += f'{i} - {slot}, '
+        if i % 5 == 4 or i == len(slot_dict) - 1:
+            logging.info(str)
+            str = ''
+
+    logging.info('----------------')
+    cnt = 0
+    for i in range(len(intent_preds)):
+        cur_slot_pred = slot_preds[i][subtokens_mask[i]]
+        cur_slot_label = slot_labels[i][subtokens_mask[i]]
+        if not np.all(cur_slot_pred == cur_slot_label):
+            query = queries[i].split('\t')[0]
+            logging.info(
+                f'{query} (predicted: {intent_dict[intent_preds[i]]} - labeled: {intent_dict[intent_labels[i]]})'
+            )
+            logging.info(f'p: {cur_slot_pred}')
+            logging.info(f'l: {cur_slot_label}')
+            cnt = cnt + 1
+            if cnt >= limit:
+                break
+
+
+def check_problematic_slots(slot_preds_list, slot_dict):
+    """ Check non compliance of B- and I- slots for datasets that use such slot encoding. """
+    cnt = 0
+
+    # for sentence in slot_preds:
+    # slots = sentence.split(" ")
+    sentence = slot_preds_list
+    for i in range(len(sentence)):
+        slot_name = slot_dict[int(sentence[i])]
+        if slot_name.startswith("I-"):
+            prev_slot_name = slot_dict[int(sentence[i - 1])]
+            if slot_name[2:] != prev_slot_name[2:]:
+                print("Problem: " + slot_name + " - " + prev_slot_name)
+                cnt += 1
+    print("Total problematic slots: " + str(cnt))
diff --git a/nemo/collections/nlp/utils/loss_utils.py b/nemo/collections/nlp/utils/functional_utils.py
similarity index 90%
rename from nemo/collections/nlp/utils/loss_utils.py
rename to nemo/collections/nlp/utils/functional_utils.py
index a4d3da6ef10f..ff36534bacba 100644
--- a/nemo/collections/nlp/utils/loss_utils.py
+++ b/nemo/collections/nlp/utils/functional_utils.py
@@ -16,7 +16,9 @@
 
 import math
 
-__all__ = ['_compute_softmax']
+import torch
+
+__all__ = ['_compute_softmax', 'gelu']
 
 
 def _compute_softmax(scores):
@@ -40,3 +42,7 @@ def _compute_softmax(scores):
     for score in exp_scores:
         probs.append(score / total_sum)
     return probs
+
+
+def gelu(x):
+    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
diff --git a/nemo/collections/nlp/utils/huggingface_utils.py b/nemo/collections/nlp/utils/huggingface_utils.py
deleted file mode 100644
index 98f3df9c36b7..000000000000
--- a/nemo/collections/nlp/utils/huggingface_utils.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# =============================================================================
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-__all__ = ['MODEL_SPECIAL_TOKENS', 'MODEL_NAMES']
-
-MODEL_SPECIAL_TOKENS = {
-    "bert": {
-        "unk_token": "[UNK]",
-        "sep_token": "[SEP]",
-        "pad_token": "[PAD]",
-        "bos_token": "[CLS]",
-        "mask_token": "[MASK]",
-        "eos_token": "[SEP]",
-        "cls_token": "[CLS]",
-    },
-    "roberta": {
-        "unk_token": "<unk>",
-        "sep_token": "</s>",
-        "pad_token": "<pad>",
-        "bos_token": "<s>",
-        "mask_token": "<mask>",
-        "eos_token": "</s>",
-        "cls_token": "<s>",
-    },
-    "albert": {
-        "unk_token": "<unk>",
-        "sep_token": "[SEP]",
-        "eos_token": "[SEP]",
-        "pad_token": "<pad>",
-        "cls_token": "[CLS]",
-        "bos_token": "[CLS]",
-        "mask_token": "[MASK]",
-    },
-}
-
-
-MODEL_NAMES = {
-    "bert": {"model_name": "bert-base-uncased", "tokenizer_name": "bert-base-uncased",},
-    "roberta": {"model_name": "roberta-base", "tokenizer_name": "roberta-base",},
-    "albert": {"model_name": "albert-base-v2", "tokenizer_name": "albert-base-v2",},
-}
diff --git a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_utils.py b/nemo/collections/nlp/utils/transformer_utils.py
similarity index 73%
rename from nemo/collections/nlp/nm/trainables/common/transformer/transformer_utils.py
rename to nemo/collections/nlp/utils/transformer_utils.py
index 4f3f80ec670a..4c8742098182 100644
--- a/nemo/collections/nlp/nm/trainables/common/transformer/transformer_utils.py
+++ b/nemo/collections/nlp/utils/transformer_utils.py
@@ -1,13 +1,25 @@
-import math
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
 
 import torch
 import torch.nn as nn
 
-NEG_INF = -10000.0
-
+__all__ = ['form_attention_mask', 'transformer_weights_init']
 
-def gelu(x):
-    return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
+NEG_INF = -10000.0
 
 
 def form_attention_mask(input_mask, diagonal=None):
diff --git a/nemo/collections/simple_gan/gan.py b/nemo/collections/simple_gan/gan.py
index b0d39a406d64..d441a45e53ae 100644
--- a/nemo/collections/simple_gan/gan.py
+++ b/nemo/collections/simple_gan/gan.py
@@ -7,6 +7,7 @@
 from nemo.backends.pytorch.nm import DataLayerNM, LossNM, TrainableNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import ChannelType, LabelsType, LossType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 
 class SimpleDiscriminator(TrainableNM):
@@ -15,6 +16,7 @@ class SimpleDiscriminator(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -31,6 +33,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -65,6 +68,7 @@ class SimpleGenerator(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -81,6 +85,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -128,6 +133,7 @@ class DiscriminatorLoss(LossNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
 
@@ -142,6 +148,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -168,6 +175,7 @@ class GradientPenalty(LossNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -186,6 +194,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -225,6 +234,7 @@ class InterpolateImage(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -250,6 +260,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -286,6 +297,7 @@ class RandomDataLayer(DataLayerNM):
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
 
@@ -351,6 +363,7 @@ class MnistGanDataLayer(DataLayerNM):
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
diff --git a/nemo/collections/tts/__init__.py b/nemo/collections/tts/__init__.py
index d0b4fbfedca3..548149ecb6bf 100644
--- a/nemo/collections/tts/__init__.py
+++ b/nemo/collections/tts/__init__.py
@@ -14,6 +14,8 @@
 # =============================================================================
 
 from nemo.collections.tts.data_layers import AudioDataLayer
+from nemo.collections.tts.fastspeech_modules import *
+from nemo.collections.tts.fastspeech_modules import __all__ as fastspeech__all__
 from nemo.collections.tts.parts.helpers import *
 from nemo.collections.tts.parts.helpers import __all__ as helpers__all__
 from nemo.collections.tts.tacotron2_modules import *
@@ -24,4 +26,4 @@
 
 backend = Backend.PyTorch
 
-__all__ = ["AudioDataLayer"] + helpers__all__ + tacotron2__all__ + waveglow__all__
+__all__ = ["AudioDataLayer"] + helpers__all__ + tacotron2__all__ + waveglow__all__ + fastspeech__all__
diff --git a/nemo/collections/tts/data_layers.py b/nemo/collections/tts/data_layers.py
index ffebe99e3df9..6d29b4504cc9 100644
--- a/nemo/collections/tts/data_layers.py
+++ b/nemo/collections/tts/data_layers.py
@@ -1,11 +1,12 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import torch
 
-import nemo
 from .parts.datasets import AudioOnlyDataset
 from nemo.backends.pytorch.nm import DataLayerNM
 from nemo.core import DeviceType
 from nemo.core.neural_types import AudioSignal, LengthsType, NeuralType
+from nemo.utils import logging
+from nemo.utils.decorators import add_port_docs
 
 
 class AudioDataLayer(DataLayerNM):
@@ -46,13 +47,14 @@ class AudioDataLayer(DataLayerNM):
     """
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {
             # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             # "a_sig_length": NeuralType({0: AxisType(BatchTag)}),
-            "audio_signal": NeuralType(('B', 'T'), AudioSignal()),
+            "audio_signal": NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate)),
             "a_sig_length": NeuralType(tuple('B'), LengthsType()),
         }
 
@@ -60,6 +62,7 @@ def __init__(
         self,
         manifest_filepath,
         batch_size,
+        sample_rate,
         min_duration=0.1,
         max_duration=None,
         trim_silence=False,
@@ -69,6 +72,7 @@ def __init__(
         n_segments=0,
     ):
         super().__init__()
+        self.sample_rate = sample_rate
 
         self._dataset = AudioOnlyDataset(
             manifest_filepath=manifest_filepath,
@@ -80,7 +84,7 @@ def __init__(
 
         sampler = None
         if self._placement == DeviceType.AllGpu:
-            nemo.logging.info('Parallelizing DATALAYER')
+            logging.info('Parallelizing DATALAYER')
             sampler = torch.utils.data.distributed.DistributedSampler(self._dataset)
 
         self._dataloader = torch.utils.data.DataLoader(
diff --git a/nemo/collections/tts/fastspeech_modules.py b/nemo/collections/tts/fastspeech_modules.py
new file mode 100644
index 000000000000..bda37858f9a9
--- /dev/null
+++ b/nemo/collections/tts/fastspeech_modules.py
@@ -0,0 +1,373 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+import nemo
+from nemo.backends.pytorch import nm as nemo_nm
+from nemo.backends.pytorch.nm import DataLayerNM, LossNM
+from nemo.collections.asr.parts import AudioDataset, WaveformFeaturizer
+from nemo.collections.tts.parts import fastspeech, fastspeech_transformer
+from nemo.core.neural_types import AudioSignal, EmbeddedTextType, LengthsType, MaskType, MelSpectrogramType, NeuralType
+from nemo.utils.decorators import add_port_docs
+
+__all__ = ['FastSpeechDataLayer', 'FastSpeech', 'FastSpeechLoss']
+
+
+class FastSpeechDataLayer(DataLayerNM):
+    """Data Layer for Fast Speech model.
+
+    Basically, replicated behavior from AudioToText Data Layer, zipped with ground truth durations for additional loss.
+
+    Args:
+        manifest_filepath (str): Dataset parameter.
+            Path to JSON containing data.
+        durs_dir (str): Path to durations arrays directory.
+        labels (list): Dataset parameter.
+            List of characters that can be output by the ASR model.
+            For Jasper, this is the 28 character set {a-z '}. The CTC blank
+            symbol is automatically added later for models using ctc.
+        batch_size (int): batch size
+        sample_rate (int): Target sampling rate for data. Audio files will be
+            resampled to sample_rate if it is not already.
+            Defaults to 16000.
+        int_values (bool): Bool indicating whether the audio file is saved as
+            int data or float data.
+            Defaults to False.
+        eos_id (id): Dataset parameter.
+            End of string symbol id used for seq2seq models.
+            Defaults to None.
+        min_duration (float): Dataset parameter.
+            All training files which have a duration less than min_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to 0.1.
+        max_duration (float): Dataset parameter.
+            All training files which have a duration more than max_duration
+            are dropped. Note: Duration is read from the manifest JSON.
+            Defaults to None.
+        normalize_transcripts (bool): Dataset parameter.
+            Whether to use automatic text cleaning.
+            It is highly recommended to manually clean text for best results.
+            Defaults to True.
+        trim_silence (bool): Whether to use trim silence from beginning and end
+            of audio signal using librosa.effects.trim().
+            Defaults to False.
+        load_audio (bool): Dataset parameter.
+            Controls whether the dataloader loads the audio signal and
+            transcript or just the transcript.
+            Defaults to True.
+        drop_last (bool): See PyTorch DataLoader.
+            Defaults to False.
+        shuffle (bool): See PyTorch DataLoader.
+            Defaults to True.
+        num_workers (int): See PyTorch DataLoader.
+            Defaults to 0.
+        perturb_config (dict): Currently disabled.
+
+    """
+
+    @property
+    @add_port_docs
+    def output_ports(self):
+        """Returns definitions of module output ports."""
+        return dict(
+            audio=NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate)),
+            audio_len=NeuralType(tuple('B'), LengthsType()),
+            text=NeuralType(('B', 'T'), EmbeddedTextType()),
+            text_pos=NeuralType(('B', 'T'), MaskType()),
+            dur_true=NeuralType(('B', 'T'), LengthsType()),
+        )
+
+    def __init__(
+        self,
+        manifest_filepath,
+        durs_dir,
+        labels,
+        batch_size,
+        sample_rate=16000,
+        int_values=False,
+        bos_id=None,
+        eos_id=None,
+        pad_id=None,
+        min_duration=0.1,
+        max_duration=None,
+        normalize_transcripts=True,
+        trim_silence=False,
+        load_audio=True,
+        drop_last=False,
+        shuffle=True,
+        num_workers=0,
+    ):
+        super().__init__()
+
+        # Set up dataset.
+        self._featurizer = WaveformFeaturizer(sample_rate=sample_rate, int_values=int_values, augmentor=None)
+        dataset_params = {
+            'manifest_filepath': manifest_filepath,
+            'labels': labels,
+            'featurizer': self._featurizer,
+            'max_duration': max_duration,
+            'min_duration': min_duration,
+            'normalize': normalize_transcripts,
+            'trim': trim_silence,
+            'bos_id': bos_id,
+            'eos_id': eos_id,
+            'load_audio': load_audio,
+        }
+        audio_dataset = AudioDataset(**dataset_params)
+        self._dataset = fastspeech.FastSpeechDataset(audio_dataset, durs_dir)
+        self._pad_id = pad_id
+        self.sample_rate = sample_rate
+
+        sampler = None
+        if self._placement == nemo.core.DeviceType.AllGpu:
+            sampler = torch.utils.data.distributed.DistributedSampler(self._dataset)
+
+        self._dataloader = torch.utils.data.DataLoader(
+            dataset=self._dataset,
+            batch_size=batch_size,
+            collate_fn=self._collate,
+            drop_last=drop_last,
+            shuffle=shuffle if sampler is None else False,
+            sampler=sampler,
+            num_workers=num_workers,
+        )
+
+    def _collate(self, batch):
+        def merge(tensors, value=0.0, dtype=torch.float):
+            max_len = max(tensor.shape[0] for tensor in tensors)
+            new_tensors = []
+            for tensor in tensors:
+                pad = (2 * len(tensor.shape)) * [0]
+                pad[-1] = max_len - tensor.shape[0]
+                new_tensors.append(F.pad(tensor, pad=pad, value=value))
+            return torch.stack(new_tensors).to(dtype=dtype)
+
+        def make_pos(lengths):
+            return merge([torch.arange(length) + 1 for length in lengths], value=0, dtype=torch.int64)
+
+        batch = {key: [example[key] for example in batch] for key in batch[0]}
+
+        audio = merge(batch['audio'])
+        audio_len = torch.tensor(batch['audio_len'])
+        text = merge(batch['text'], value=self._pad_id or 0, dtype=torch.long)
+        text_pos = make_pos(batch.pop('text_len'))
+        dur_true = merge(batch['dur_true'])
+
+        assert text.shape == text_pos.shape
+        assert text.shape == dur_true.shape
+
+        return audio, audio_len, text, text_pos, dur_true
+
+    def __len__(self) -> int:
+        return len(self._dataset)
+
+    @property
+    def dataset(self) -> Optional[torch.utils.data.Dataset]:
+        return None
+
+    @property
+    def data_iterator(self) -> Optional[torch.utils.data.DataLoader]:
+        return self._dataloader
+
+
+class FastSpeech(nemo_nm.TrainableNM):
+    """FastSpeech Model.
+
+    Attributes:
+        decoder_output_size: Output size for decoder.
+        n_mels: Number of features for mel spectrogram.
+        max_seq_len: Maximum length of input sequence.
+        word_vec_dim: Dimensionality of word embedding vector.
+        encoder_n_layer: Number of layers for encoder.
+        encoder_head: Number of heads for encoder.
+        encoder_conv1d_filter_size: Filter size for encoder convolutions.
+        decoder_n_layer: Number of layers for decoder.
+        decoder_head: Number of heads for decoder.
+        decoder_conv1d_filter_size: Filter size for decoder convolutions.
+        fft_conv1d_kernel: Kernel size for FFT.
+        fft_conv1d_padding: Padding for FFT.
+        encoder_output_size: Output size for encoder.
+        duration_predictor_filter_size: Predictor filter size.
+        duration_predictor_kernel_size: Predictor kernel size.
+        dropout: Dropout probability.
+        Alpha: Predictor loss coeficient.
+
+    """
+
+    @property
+    @add_port_docs
+    def input_ports(self):
+        """Returns definitions of module input ports."""
+        return dict(
+            text=NeuralType(('B', 'T'), EmbeddedTextType()),
+            text_pos=NeuralType(('B', 'T'), MaskType()),
+            mel_true=NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
+            dur_true=NeuralType(('B', 'T'), LengthsType()),
+        )
+
+    @property
+    @add_port_docs
+    def output_ports(self):
+        """Returns definitions of module output ports."""
+        return dict(
+            mel_pred=NeuralType(('B', 'D', 'T'), MelSpectrogramType()), dur_pred=NeuralType(('B', 'T'), LengthsType()),
+        )
+
+    def __init__(
+        self,
+        decoder_output_size: int,
+        n_mels: int,
+        max_seq_len: int,
+        word_vec_dim: int,
+        encoder_n_layer: int,
+        encoder_head: int,
+        encoder_conv1d_filter_size: int,
+        decoder_n_layer: int,
+        decoder_head: int,
+        decoder_conv1d_filter_size: int,
+        fft_conv1d_kernel: int,
+        fft_conv1d_padding: int,
+        encoder_output_size: int,
+        duration_predictor_filter_size: int,
+        duration_predictor_kernel_size: int,
+        dropout: float,
+        alpha: float,
+        n_src_vocab: int,
+        pad_id: int,
+    ):
+        super().__init__()
+
+        self.encoder = fastspeech_transformer.FastSpeechTransformerEncoder(
+            len_max_seq=max_seq_len,
+            d_word_vec=word_vec_dim,
+            n_layers=encoder_n_layer,
+            n_head=encoder_head,
+            d_k=64,
+            d_v=64,
+            d_model=word_vec_dim,
+            d_inner=encoder_conv1d_filter_size,
+            fft_conv1d_kernel=fft_conv1d_kernel,
+            fft_conv1d_padding=fft_conv1d_padding,
+            dropout=dropout,
+            n_src_vocab=n_src_vocab,
+            pad_id=pad_id,
+        ).to(self._device)
+        self.length_regulator = fastspeech.LengthRegulator(
+            encoder_output_size, duration_predictor_filter_size, duration_predictor_kernel_size, dropout
+        ).to(self._device)
+
+        self.decoder = fastspeech_transformer.FastSpeechTransformerDecoder(
+            len_max_seq=max_seq_len,
+            d_word_vec=word_vec_dim,
+            n_layers=decoder_n_layer,
+            n_head=decoder_head,
+            d_k=64,
+            d_v=64,
+            d_model=word_vec_dim,
+            d_inner=decoder_conv1d_filter_size,
+            fft_conv1d_kernel=fft_conv1d_kernel,
+            fft_conv1d_padding=fft_conv1d_padding,
+            dropout=dropout,
+            pad_id=pad_id,
+        ).to(self._device)
+
+        self.mel_linear = nn.Linear(decoder_output_size, n_mels, bias=True).to(self._device)
+        self.alpha = alpha
+
+    def forward(self, text, text_pos, mel_true=None, dur_true=None):
+        encoder_output, encoder_mask = self.encoder(text, text_pos)
+
+        if self.training:
+            mel_max_length = mel_true.shape[2]
+            length_regulator_output, decoder_pos, dur_pred = self.length_regulator(
+                encoder_output, encoder_mask, dur_true, self.alpha, mel_max_length
+            )
+
+            assert length_regulator_output.shape[1] <= mel_max_length
+
+        else:
+            length_regulator_output, decoder_pos, dur_pred = self.length_regulator(
+                encoder_output, encoder_mask, alpha=self.alpha
+            )
+
+        decoder_output, decoder_mask = self.decoder(length_regulator_output, decoder_pos)
+        mel_pred = self.mel_linear(decoder_output).transpose(1, 2)
+
+        assert mel_pred.shape[2] == dur_true.sum(-1).max()
+        assert mel_true.shape[2] == dur_true.sum(-1).max()
+
+        return mel_pred, dur_pred
+
+
+class FastSpeechLoss(LossNM):
+    """Neural Module Wrapper for Fast Speech Loss.
+
+    Calculates final loss as sum of two: MSE for mel spectrograms and MSE for durations.
+
+    """
+
+    @property
+    @add_port_docs
+    def input_ports(self):
+        """Returns definitions of module input ports."""
+        return dict(
+            mel_true=NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
+            mel_pred=NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
+            dur_true=NeuralType(('B', 'T'), LengthsType()),
+            dur_pred=NeuralType(('B', 'T'), LengthsType()),
+            text_pos=NeuralType(('B', 'T'), MaskType()),
+        )
+
+    @property
+    @add_port_docs
+    def output_ports(self):
+        """Returns definitions of module output ports."""
+        return dict(loss=NeuralType(None))
+
+    def _loss_function(self, **kwargs):
+        return self._loss(*(kwargs.values()))
+
+    @staticmethod
+    def _loss(
+        mel_true, mel_pred, dur_true, dur_pred, text_pos,
+    ):
+        """Do the actual math in FastSpeech loss calculation.
+
+        Args:
+            mel_true: Ground truth mel spectrogram features (BTC, float).
+            mel_pred: Predicted mel spectrogram features (BTC, float).
+            dur_true: Ground truth durations (BQ, float).
+            dur_pred: Predicted log-normalized durations (BQ, float).
+
+        Returns:
+            Single 0-dim loss tensor.
+
+        """
+
+        mel_loss = F.mse_loss(mel_pred, mel_true, reduction='none')
+        mel_loss *= mel_true.ne(0).float()
+        mel_loss = mel_loss.mean()
+
+        dur_loss = F.mse_loss(dur_pred, (dur_true + 1).log(), reduction='none')
+        dur_loss *= text_pos.ne(0).float()
+        dur_loss = dur_loss.mean()
+
+        loss = mel_loss + dur_loss
+
+        return loss
diff --git a/nemo/collections/tts/parts/__init__.py b/nemo/collections/tts/parts/__init__.py
index 1350837aada6..032c0b897f90 100644
--- a/nemo/collections/tts/parts/__init__.py
+++ b/nemo/collections/tts/parts/__init__.py
@@ -1,4 +1,5 @@
 from .datasets import AudioOnlyDataset
+from .fastspeech import FastSpeechDataset
 from .helpers import (
     tacotron2_eval_log_to_tb_func,
     tacotron2_log_to_tb_func,
@@ -26,4 +27,5 @@
     'tacotron2_process_eval_batch',
     'tacotron2_process_final_eval',
     'tacotron2_eval_log_to_tb_func',
+    'FastSpeechDataset',
 ]
diff --git a/nemo/collections/tts/parts/fastspeech.py b/nemo/collections/tts/parts/fastspeech.py
new file mode 100644
index 000000000000..1c557ebe0d9e
--- /dev/null
+++ b/nemo/collections/tts/parts/fastspeech.py
@@ -0,0 +1,157 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import os
+
+import numpy as np
+import torch
+from torch import nn
+
+
+class FastSpeechDataset:
+    def __init__(self, audio_dataset, durs_dir):
+        self._audio_dataset = audio_dataset
+        self._durs_dir = durs_dir
+
+    def __getitem__(self, index):
+        audio, audio_len, text, text_len = self._audio_dataset[index]
+        dur_true = torch.tensor(np.load(os.path.join(self._durs_dir, f'{index}.npy'))).long()
+        return dict(audio=audio, audio_len=audio_len, text=text, text_len=text_len, dur_true=dur_true)
+
+    def __len__(self):
+        return len(self._audio_dataset)
+
+
+class LengthRegulator(nn.Module):
+    """Length Regulator."""
+
+    def __init__(self, encoder_output_size, duration_predictor_filter_size, duration_predictor_kernel_size, dropout):
+        super(LengthRegulator, self).__init__()
+
+        self.duration_predictor = DurationPredictor(
+            input_size=encoder_output_size,
+            filter_size=duration_predictor_filter_size,
+            kernel=duration_predictor_kernel_size,
+            conv_output_size=duration_predictor_filter_size,
+            dropout=dropout,
+        )
+
+    def forward(self, encoder_output, encoder_output_mask, target=None, alpha=1.0, mel_max_length=None):
+        duration_predictor_output = self.duration_predictor(encoder_output, encoder_output_mask)
+
+        if self.training:
+            output, dec_pos = self.get_output(encoder_output, target, alpha, mel_max_length)
+        else:
+            duration_predictor_output = torch.clamp_min(torch.exp(duration_predictor_output) - 1, 0)
+
+            output, dec_pos = self.get_output(encoder_output, duration_predictor_output, alpha)
+
+        return output, dec_pos, duration_predictor_output
+
+    @staticmethod
+    def get_output(encoder_output, duration_predictor_output, alpha, mel_max_length=None):
+        output = list()
+        dec_pos = list()
+
+        for i in range(encoder_output.size(0)):
+            repeats = duration_predictor_output[i].float() * alpha
+            repeats = torch.round(repeats).long()
+            output.append(torch.repeat_interleave(encoder_output[i], repeats, dim=0))
+            dec_pos.append(torch.from_numpy(np.indices((output[i].shape[0],))[0] + 1))
+
+        output = torch.nn.utils.rnn.pad_sequence(output, batch_first=True)
+        dec_pos = torch.nn.utils.rnn.pad_sequence(dec_pos, batch_first=True)
+
+        dec_pos = dec_pos.to(output.device, non_blocking=True)
+
+        if mel_max_length:
+            output = output[:, :mel_max_length]
+            dec_pos = dec_pos[:, :mel_max_length]
+
+        return output, dec_pos
+
+
+class ConvTranspose(nn.Module):
+    """Convolution Module with transposes of last two dimensions."""
+
+    def __init__(
+        self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True, w_init='relu'
+    ):
+        super(ConvTranspose, self).__init__()
+
+        self.conv = nn.Conv1d(
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            bias=bias,
+        )
+
+        nn.init.xavier_uniform_(self.conv.weight, gain=nn.init.calculate_gain(w_init))
+
+    def forward(self, x):
+        x = x.contiguous().transpose(1, 2)
+        x = self.conv(x)
+        x = x.contiguous().transpose(1, 2)
+
+        return x
+
+
+class DurationPredictor(nn.Module):
+    """Duration Predictor."""
+
+    def __init__(self, input_size, filter_size, kernel, conv_output_size, dropout):
+        super(DurationPredictor, self).__init__()
+
+        self.input_size = input_size
+        self.filter_size = filter_size
+        self.kernel = kernel
+        self.conv_output_size = conv_output_size
+        self.dropout = dropout
+
+        self.conv_layer = nn.Sequential(
+            collections.OrderedDict(
+                [
+                    (
+                        "conv1d_1",
+                        ConvTranspose(self.input_size, self.filter_size, kernel_size=self.kernel, padding=1),
+                    ),
+                    ("relu_1", nn.ReLU()),
+                    ("layer_norm_1", nn.LayerNorm(self.filter_size)),
+                    ("dropout_1", nn.Dropout(self.dropout)),
+                    (
+                        "conv1d_2",
+                        ConvTranspose(self.filter_size, self.filter_size, kernel_size=self.kernel, padding=1),
+                    ),
+                    ("relu_2", nn.ReLU()),
+                    ("layer_norm_2", nn.LayerNorm(self.filter_size)),
+                    ("dropout_2", nn.Dropout(self.dropout)),
+                ]
+            )
+        )
+
+        self.linear_layer = nn.Linear(self.conv_output_size, 1, bias=True)
+
+    def forward(self, encoder_output, encoder_output_mask):
+        encoder_output = encoder_output * encoder_output_mask
+
+        out = self.conv_layer(encoder_output)
+        out = self.linear_layer(out)
+        out = out * encoder_output_mask
+        out = out.squeeze(-1)
+
+        return out
diff --git a/nemo/collections/tts/parts/fastspeech_transformer.py b/nemo/collections/tts/parts/fastspeech_transformer.py
new file mode 100644
index 000000000000..dc26a68153e1
--- /dev/null
+++ b/nemo/collections/tts/parts/fastspeech_transformer.py
@@ -0,0 +1,197 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import torch
+from torch import nn
+
+from nemo.collections.nlp.nm.trainables.common.transformer import transformer_modules
+
+
+def get_non_pad_mask(seq, pad_id):
+    assert seq.dim() == 2
+    return seq.ne(pad_id).type(torch.float).unsqueeze(-1)
+
+
+def get_sinusoid_encoding_table(n_position, d_hid, padding_idx=None):
+    """Sinusoid position encoding table."""
+
+    def cal_angle(position, hid_idx):
+        return position / np.power(10000, 2 * (hid_idx // 2) / d_hid)
+
+    def get_posi_angle_vec(position):
+        return [cal_angle(position, hid_j) for hid_j in range(d_hid)]
+
+    sinusoid_table = np.array([get_posi_angle_vec(pos_i) for pos_i in range(n_position)])
+
+    sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
+    sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
+
+    if padding_idx is not None:
+        # zero vector for padding dimension
+        sinusoid_table[padding_idx] = 0.0
+
+    return torch.tensor(sinusoid_table).float()
+
+
+def get_attn_key_pad_mask(seq_k, seq_q, pad_id):
+    """For masking out the padding part of key sequence."""
+
+    # Expand to fit the shape of key query attention matrix.
+    len_q = seq_q.size(1)
+    padding_mask = seq_k.eq(pad_id)
+    padding_mask = padding_mask.unsqueeze(1).expand(-1, len_q, -1)  # b x lq x lk
+
+    return padding_mask
+
+
+class FFTBlock(torch.nn.Module):
+    """FFT Block"""
+
+    def __init__(self, d_model, d_inner, n_head, d_k, d_v, fft_conv1d_kernel, fft_conv1d_padding, dropout=0.1):
+        super(FFTBlock, self).__init__()
+        self.slf_attn = transformer_modules.MultiHeadAttention(d_model, n_head, attn_layer_dropout=dropout)
+        self.n_head = n_head
+        self.pos_ffn = transformer_modules.PositionWiseFF(
+            hidden_size=d_model, inner_size=d_inner, ffn_dropout=dropout,
+        )
+
+    def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None):
+        slf_attn_mask = slf_attn_mask.unsqueeze(1).repeat(1, self.n_head, 1, 1)
+        enc_output = self.slf_attn(enc_input, enc_input, enc_input, attention_mask=slf_attn_mask)
+        enc_output *= non_pad_mask
+
+        enc_output = self.pos_ffn(enc_output)
+        enc_output *= non_pad_mask
+
+        return enc_output
+
+
+class FastSpeechTransformerEncoder(nn.Module):
+    """Encoder."""
+
+    def __init__(
+        self,
+        len_max_seq,
+        d_word_vec,
+        n_layers,
+        n_head,
+        d_k,
+        d_v,
+        d_model,
+        d_inner,
+        fft_conv1d_kernel,
+        fft_conv1d_padding,
+        dropout,
+        n_src_vocab,
+        pad_id,
+    ):
+
+        super(FastSpeechTransformerEncoder, self).__init__()
+
+        n_position = len_max_seq + 1
+
+        self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=pad_id)
+        self.pad_id = pad_id
+
+        self.position_enc = nn.Embedding.from_pretrained(
+            get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0), freeze=True
+        )
+
+        self.layer_stack = nn.ModuleList(
+            [
+                FFTBlock(
+                    d_model,
+                    d_inner,
+                    n_head,
+                    d_k,
+                    d_v,
+                    fft_conv1d_kernel=fft_conv1d_kernel,
+                    fft_conv1d_padding=fft_conv1d_padding,
+                    dropout=dropout,
+                )
+                for _ in range(n_layers)
+            ]
+        )
+
+    def forward(self, src_seq, src_pos):
+        # -- Prepare masks
+        slf_attn_mask = get_attn_key_pad_mask(seq_k=src_seq, seq_q=src_seq, pad_id=self.pad_id)
+        non_pad_mask = get_non_pad_mask(src_seq, pad_id=self.pad_id)
+
+        # -- Forward
+        enc_output = self.src_word_emb(src_seq) + self.position_enc(src_pos)
+
+        for i, enc_layer in enumerate(self.layer_stack):
+            enc_output = enc_layer(enc_output, non_pad_mask=non_pad_mask, slf_attn_mask=slf_attn_mask)
+
+        return enc_output, non_pad_mask
+
+
+class FastSpeechTransformerDecoder(nn.Module):
+    """Decoder."""
+
+    def __init__(
+        self,
+        len_max_seq,
+        d_word_vec,
+        n_layers,
+        n_head,
+        d_k,
+        d_v,
+        d_model,
+        d_inner,
+        fft_conv1d_kernel,
+        fft_conv1d_padding,
+        dropout,
+        pad_id,
+    ):
+
+        super(FastSpeechTransformerDecoder, self).__init__()
+
+        n_position = len_max_seq + 1
+
+        self.position_dec = nn.Embedding.from_pretrained(
+            get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0), freeze=True
+        )
+        self.pad_id = pad_id
+
+        self.layer_stack = nn.ModuleList(
+            [
+                FFTBlock(
+                    d_model,
+                    d_inner,
+                    n_head,
+                    d_k,
+                    d_v,
+                    fft_conv1d_kernel=fft_conv1d_kernel,
+                    fft_conv1d_padding=fft_conv1d_padding,
+                    dropout=dropout,
+                )
+                for _ in range(n_layers)
+            ]
+        )
+
+    def forward(self, dec_seq, dec_pos):
+        # -- Prepare masks
+        slf_attn_mask = get_attn_key_pad_mask(seq_k=dec_pos, seq_q=dec_pos, pad_id=self.pad_id)
+        non_pad_mask = get_non_pad_mask(dec_pos, pad_id=self.pad_id)
+
+        # -- Forward
+        dec_output = dec_seq + self.position_dec(dec_pos)
+
+        for dec_layer in self.layer_stack:
+            dec_output = dec_layer(dec_output, non_pad_mask=non_pad_mask, slf_attn_mask=slf_attn_mask)
+
+        return dec_output, non_pad_mask
diff --git a/nemo/collections/tts/parts/helpers.py b/nemo/collections/tts/parts/helpers.py
index 3212b086bb6e..0819b6398b45 100644
--- a/nemo/collections/tts/parts/helpers.py
+++ b/nemo/collections/tts/parts/helpers.py
@@ -4,7 +4,7 @@
 import numpy as np
 import torch
 
-import nemo
+from nemo.utils import logging
 
 __all__ = [
     "waveglow_log_to_tb_func",
@@ -139,7 +139,7 @@ def tacotron2_process_eval_batch(tensors: dict, global_vars: dict):
 def tacotron2_process_final_eval(global_vars: dict, tag=None):
     eloss = torch.mean(torch.stack(global_vars['EvalLoss'])).item()
     global_vars['EvalLoss'] = eloss
-    nemo.logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
+    logging.info(f"==========>>>>>>Evaluation Loss {tag}: {eloss}")
     return global_vars
 
 
diff --git a/nemo/collections/tts/parts/layers.py b/nemo/collections/tts/parts/layers.py
index e78ef415b06e..9be02861c4cd 100644
--- a/nemo/collections/tts/parts/layers.py
+++ b/nemo/collections/tts/parts/layers.py
@@ -7,9 +7,7 @@ def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
         super(LinearNorm, self).__init__()
         self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
 
-        torch.nn.init.xavier_uniform_(
-            self.linear_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain),
-        )
+        torch.nn.init.xavier_uniform_(self.linear_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
 
     def forward(self, x):
         return self.linear_layer(x)
diff --git a/nemo/collections/tts/parts/tacotron2.py b/nemo/collections/tts/parts/tacotron2.py
index 13e845231ec1..925251f19f44 100644
--- a/nemo/collections/tts/parts/tacotron2.py
+++ b/nemo/collections/tts/parts/tacotron2.py
@@ -1,12 +1,12 @@
 # Copyright (c) 2019 NVIDIA Corporation
-from math import sqrt
 
 import torch
 from torch import nn
 from torch.autograd import Variable
 from torch.nn import functional as F
 
-from .layers import ConvNorm, LinearNorm, get_mask_from_lengths
+from nemo.collections.tts.parts.layers import ConvNorm, LinearNorm, get_mask_from_lengths
+from nemo.utils import logging
 
 
 class LocationLayer(nn.Module):
@@ -55,8 +55,7 @@ def get_alignment_energies(self, query, processed_memory, attention_weights_cat)
         ------
         query: decoder output (batch, n_mel_channels * n_frames_per_step)
         processed_memory: processed encoder outputs (B, T_in, attention_dim)
-        attention_weights_cat: cumulative and prev. att weights
-            (B, 2, max_time)
+        attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
         RETURNS
         -------
         alignment (batch, max_time)
@@ -107,12 +106,12 @@ def forward(self, x, inference=False):
             for linear in self.layers:
                 x = F.relu(linear(x))
                 x0 = x[0].unsqueeze(0)
-                mask = Variable(torch.bernoulli(x0.data.new(x0.data.size()).fill_(0.5)))
+                mask = Variable(torch.bernoulli(x0.data.new(x0.data.size()).fill_(1 - self.p_dropout)))
                 mask = mask.expand(x.size(0), x.size(1))
-                x = x * mask * 2
+                x = x * mask * 1 / (1 - self.p_dropout)
         else:
             for linear in self.layers:
-                x = F.dropout(F.relu(linear(x)), p=0.0, training=True)
+                x = F.dropout(F.relu(linear(x)), p=self.p_dropout, training=True)
         return x
 
 
@@ -176,7 +175,7 @@ def __init__(
 
     def forward(self, x):
         for i in range(len(self.convolutions) - 1):
-            x = F.dropout(torch.tanh(self.convolutions[i](x)), 0.5, self.training)
+            x = F.dropout(torch.tanh(self.convolutions[i](x)), self.p_dropout, self.training)
         x = F.dropout(self.convolutions[-1](x), self.p_dropout, self.training)
 
         return x
@@ -264,7 +263,7 @@ def __init__(
         self.p_decoder_dropout = p_decoder_dropout
         self.early_stopping = early_stopping
 
-        self.prenet = Prenet(n_mel_channels * n_frames_per_step, [prenet_dim, prenet_dim], prenet_p_dropout,)
+        self.prenet = Prenet(n_mel_channels * n_frames_per_step, [prenet_dim, prenet_dim], prenet_p_dropout)
 
         self.attention_rnn = nn.LSTMCell(prenet_dim + encoder_embedding_dim, attention_rnn_dim)
 
@@ -487,7 +486,7 @@ def infer(self, memory, memory_lengths):
             alignments += [alignment]
 
             if len(mel_outputs) == self.max_decoder_steps:
-                logging.warning("Reached max decoder steps")
+                logging.warning("Reached max decoder steps %d.", self.max_decoder_steps)
                 break
 
             decoder_input = mel_output
diff --git a/nemo/collections/tts/parts/waveglow.py b/nemo/collections/tts/parts/waveglow.py
index 8fc011dd296e..abf81f165e60 100644
--- a/nemo/collections/tts/parts/waveglow.py
+++ b/nemo/collections/tts/parts/waveglow.py
@@ -1,4 +1,6 @@
 # Copyright (c) 2019 NVIDIA Corporation
+from typing import Tuple
+
 import torch
 import torch.nn.functional as F
 from torch.autograd import Variable
@@ -34,7 +36,7 @@ def __init__(self, c):
         W = W.view(c, c, 1)
         self.conv.weight.data = W
 
-    def forward(self, z, reverse=False):
+    def forward(self, z, reverse: bool = False):
         # shape
         batch_size, group_size, n_of_groups = z.size()
 
@@ -45,7 +47,7 @@ def forward(self, z, reverse=False):
                 # Reverse computation
                 W_inverse = W.float().inverse()
                 W_inverse = Variable(W_inverse[..., None])
-                if z.type() == 'torch.cuda.HalfTensor' or z.type() == 'torch.HalfTensor':
+                if z.dtype == torch.half:
                     W_inverse = W_inverse.half()
                 self.W_inverse = W_inverse
             z = F.conv1d(z, self.W_inverse, bias=None, stride=1, padding=0)
@@ -54,7 +56,10 @@ def forward(self, z, reverse=False):
             # Forward computation
             log_det_W = batch_size * n_of_groups * torch.logdet(W.float())
             z = self.conv(z)
-            return z, log_det_W
+            return (
+                z,
+                log_det_W,
+            )
 
 
 class WN(torch.nn.Module):
@@ -105,8 +110,8 @@ def __init__(self, n_in_channels, n_mel_channels, n_layers, n_channels, kernel_s
             res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight')
             self.res_skip_layers.append(res_skip_layer)
 
-    def forward(self, forward_input):
-        audio, spect = forward_input
+    def forward(self, forward_input: Tuple[torch.Tensor, torch.Tensor]):
+        audio, spect = forward_input[0], forward_input[1]
         audio = self.start(audio)
 
         for i in range(self.n_layers):
@@ -156,12 +161,12 @@ def __init__(
             self.WN.append(WN(n_half, n_mel_channels * n_group, **WN_config))
         self.n_remaining_channels = n_remaining_channels
 
-    def forward(self, forward_input):
+    def forward(self, forward_input: Tuple[torch.Tensor, torch.Tensor]):
         """
         forward_input[0] = mel_spectrogram:  batch x n_mel_channels x frames
         forward_input[1] = audio: batch x time
         """
-        spect, audio = forward_input
+        spect, audio = forward_input[0], forward_input[1]
 
         #  Upsample spectrogram to size of audio
         spect = self.upsample(spect)
@@ -201,7 +206,7 @@ def forward(self, forward_input):
         output_audio.append(audio)
         return torch.cat(output_audio, 1), log_s_list, log_det_W_list
 
-    def infer(self, spect, sigma=1.0):
+    def infer(self, spect, sigma: float = 1.0):
         spect = self.upsample(spect)
         # trim conv artifacts. maybe pad spec to kernel multiple
         time_cutoff = self.upsample.kernel_size[0] - self.upsample.stride[0]
@@ -211,11 +216,12 @@ def infer(self, spect, sigma=1.0):
         spect = spect.contiguous().view(spect.size(0), spect.size(1), -1)
         spect = spect.permute(0, 2, 1)
 
-        audio = torch.randn(spect.size(0), self.n_remaining_channels, spect.size(2), device=spect.device,).to(
+        audio = sigma * torch.randn(spect.size(0), self.n_remaining_channels, spect.size(2), device=spect.device).to(
             spect.dtype
         )
-
-        audio = torch.autograd.Variable(sigma * audio)
+        # audio=sigma * torch.ones(spect.size(0), self.n_remaining_channels, spect.size(2), device=spect.device).to(
+        #     spect.dtype
+        # )
 
         for k in reversed(range(self.n_flows)):
             n_half = int(audio.size(1) / 2)
@@ -226,26 +232,26 @@ def infer(self, spect, sigma=1.0):
             s = output[:, n_half:, :]
             b = output[:, :n_half, :]
             audio_1 = (audio_1 - b) / torch.exp(s)
-            audio = torch.cat([audio_0, audio_1], 1)
+            audio = torch.cat((audio_0, audio_1), 1)
 
             audio = self.convinv[k](audio, reverse=True)
-
             if k % self.n_early_every == 0 and k > 0:
-                z = torch.randn(spect.size(0), self.n_early_size, spect.size(2), device=spect.device,).to(spect.dtype)
-                audio = torch.cat((sigma * z, audio), 1)
-
-        audio = audio.permute(0, 2, 1).contiguous().view(audio.size(0), -1).data
-        return audio
-
-    @staticmethod
-    def remove_weightnorm(model):
-        waveglow = model
-        for WN in waveglow.WN:
-            WN.start = torch.nn.utils.remove_weight_norm(WN.start)
-            WN.in_layers = remove(WN.in_layers)
-            WN.cond_layers = remove(WN.cond_layers)
-            WN.res_skip_layers = remove(WN.res_skip_layers)
-        return waveglow
+                z = sigma * torch.randn(spect.size(0), self.n_early_size, spect.size(2), device=spect.device).to(
+                    spect.dtype
+                )
+                # z = sigma * torch.ones(spect.size(0), self.n_early_size, spect.size(2), device=spect.device).to(spect.dtype)
+                audio = torch.cat((z, audio), 1)
+        return audio.permute(0, 2, 1).contiguous().view(audio.size(0), -1)
+
+
+def remove_weightnorm(model):
+    waveglow = model
+    for WN in waveglow.WN:
+        WN.start = torch.nn.utils.remove_weight_norm(WN.start)
+        WN.in_layers = remove(WN.in_layers)
+        WN.cond_layers = remove(WN.cond_layers)
+        WN.res_skip_layers = remove(WN.res_skip_layers)
+    return waveglow
 
 
 def remove(conv_list):
diff --git a/nemo/collections/tts/tacotron2_modules.py b/nemo/collections/tts/tacotron2_modules.py
index 083ac4697526..5485728cd015 100644
--- a/nemo/collections/tts/tacotron2_modules.py
+++ b/nemo/collections/tts/tacotron2_modules.py
@@ -5,10 +5,12 @@
 from torch import nn
 from torch.nn.functional import pad
 
-from .parts.layers import get_mask_from_lengths
-from .parts.tacotron2 import Decoder, Encoder, Postnet
+from nemo import logging
 from nemo.backends.pytorch.nm import LossNM, NonTrainableNM, TrainableNM
+from nemo.collections.tts.parts.layers import get_mask_from_lengths
+from nemo.collections.tts.parts.tacotron2 import Decoder, Encoder, Postnet
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 
 __all__ = [
     "MakeGate",
@@ -33,6 +35,7 @@ class TextEmbedding(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -40,6 +43,7 @@ def input_ports(self):
         return {"char_phone": NeuralType(('B', 'T'), LabelsType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -75,6 +79,7 @@ class Tacotron2Encoder(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -88,6 +93,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -153,6 +159,7 @@ class Tacotron2Decoder(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -170,6 +177,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -200,6 +208,7 @@ def __init__(
         attention_location_n_filters: int = 32,
         attention_location_kernel_size: int = 31,
         prenet_p_dropout: float = 0.5,
+        force: bool = False,
     ):
         super().__init__()
         self.decoder = Decoder(
@@ -219,10 +228,11 @@ def __init__(
             prenet_p_dropout=prenet_p_dropout,
             early_stopping=True,
         )
+        self.force = force
         self.to(self._device)
 
     def forward(self, char_phone_encoded, encoded_length, mel_target):
-        if self.training:
+        if self.training or self.force:
             mel_output, gate_output, alignments = self.decoder(
                 char_phone_encoded, mel_target, memory_lengths=encoded_length
             )
@@ -269,7 +279,44 @@ class Tacotron2DecoderInfer(Tacotron2Decoder):
             Defaults to 31.
     """
 
+    def __init__(
+        self,
+        n_mel_channels: int,
+        n_frames_per_step: int = 1,
+        encoder_embedding_dim: int = 512,
+        gate_threshold: float = 0.5,
+        prenet_dim: int = 256,
+        max_decoder_steps: int = 1000,
+        decoder_rnn_dim: int = 1024,
+        p_decoder_dropout: float = 0.1,
+        p_attention_dropout: float = 0.1,
+        attention_rnn_dim: int = 1024,
+        attention_dim: int = 128,
+        attention_location_n_filters: int = 32,
+        attention_location_kernel_size: int = 31,
+        prenet_p_dropout: float = 0.5,
+        force: bool = False,
+    ):
+        super().__init__(
+            n_mel_channels=n_mel_channels,
+            n_frames_per_step=n_frames_per_step,
+            encoder_embedding_dim=encoder_embedding_dim,
+            gate_threshold=gate_threshold,
+            prenet_dim=prenet_dim,
+            max_decoder_steps=max_decoder_steps,
+            decoder_rnn_dim=decoder_rnn_dim,
+            p_decoder_dropout=p_decoder_dropout,
+            p_attention_dropout=p_attention_dropout,
+            attention_rnn_dim=attention_rnn_dim,
+            attention_dim=attention_dim,
+            attention_location_n_filters=attention_location_n_filters,
+            attention_location_kernel_size=attention_location_kernel_size,
+            prenet_p_dropout=prenet_p_dropout,
+            force=force,
+        )
+
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -283,6 +330,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -304,7 +352,7 @@ def __str__(self):
 
     def forward(self, char_phone_encoded, encoded_length):
         if self.training:
-            raise ValueError("You are using the Tacotron 2 Infer Neural Module" " in training mode.")
+            raise ValueError("You are using the Tacotron 2 Infer Neural Module in training mode.")
         with torch.no_grad():
             mel_output, gate_output, alignments, mel_len = self.decoder.infer(
                 char_phone_encoded, memory_lengths=encoded_length
@@ -329,6 +377,7 @@ class Tacotron2Postnet(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -340,6 +389,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -388,6 +438,7 @@ class Tacotron2Loss(LossNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -415,6 +466,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -467,7 +519,11 @@ class MakeGate(NonTrainableNM):
     """MakeGate is a helper Neural Module that makes the target stop value.
     """
 
+    def __init__(self):
+        super().__init__()
+
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -481,6 +537,7 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
diff --git a/nemo/collections/tts/waveglow_modules.py b/nemo/collections/tts/waveglow_modules.py
index 1acffdb59d73..b316b720d484 100644
--- a/nemo/collections/tts/waveglow_modules.py
+++ b/nemo/collections/tts/waveglow_modules.py
@@ -5,8 +5,9 @@
 
 from nemo import logging
 from nemo.backends.pytorch.nm import LossNM, TrainableNM
-from nemo.collections.tts.parts.waveglow import WaveGlow
+from nemo.collections.tts.parts.waveglow import WaveGlow, remove_weightnorm
 from nemo.core.neural_types import *
+from nemo.utils.decorators import add_port_docs
 
 __all__ = ["WaveGlowNM", "WaveGlowInferNM", "WaveGlowLoss"]
 
@@ -39,6 +40,7 @@ class WaveGlowNM(TrainableNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -48,10 +50,11 @@ def input_ports(self):
             # ),
             # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             "mel_spectrogram": NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
-            "audio": NeuralType(('B', 'T'), AudioSignal()),
+            "audio": NeuralType(('B', 'T'), AudioSignal(self.sample_rate)),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
@@ -60,13 +63,14 @@ def output_ports(self):
             # "audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             # "log_s_list": NeuralType(),
             # "log_det_W_list": NeuralType(),
-            "audio": NeuralType(('B', 'T'), AudioSignal()),
+            "audio": NeuralType(('B', 'T'), AudioSignal(self.sample_rate)),
             "log_s_list": NeuralType(elements_type=ChannelType()),
             "log_det_W_list": NeuralType(elements_type=ChannelType()),
         }
 
     def __init__(
         self,
+        sample_rate: int,
         n_mel_channels: int = 80,
         n_flows: int = 12,
         n_group: int = 8,
@@ -76,6 +80,7 @@ def __init__(
         n_wn_channels: int = 512,
         wn_kernel_size: int = 3,
     ):
+        self.sample_rate = sample_rate
         super().__init__()
         wavenet_config = {
             "n_layers": n_wn_layers,
@@ -136,6 +141,7 @@ class WaveGlowInferNM(WaveGlowNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -147,17 +153,20 @@ def input_ports(self):
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         # return {"audio": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
-        return {"audio": NeuralType(('B', 'T'), AudioSignal())}
+        return {"audio": NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate))}
 
     def __str__(self):
         return "WaveGlowNM"
 
     def __init__(
         self,
+        *,
+        sample_rate: int,
         n_mel_channels: int = 80,
         n_flows: int = 12,
         n_group: int = 8,
@@ -169,7 +178,9 @@ def __init__(
         sigma: float = 0.6,
     ):
         self._sigma = sigma
+        # self.sample_rate = sample_rate  # Done in parent class
         super().__init__(
+            sample_rate=sample_rate,
             n_mel_channels=n_mel_channels,
             n_flows=n_flows,
             n_group=n_group,
@@ -199,10 +210,10 @@ def denoise(self, audio, strength=0.1):
     def forward(self, mel_spectrogram):
         if not self._removed_weight_norm:
             logging.info("remove WN")
-            self.waveglow = self.waveglow.remove_weightnorm(self.waveglow)
+            self.waveglow = remove_weightnorm(self.waveglow)
             self._removed_weight_norm = True
         if self.training:
-            raise ValueError("You are using the WaveGlow Infer Neural Module " "in training mode.")
+            raise ValueError("You are using the WaveGlow Infer Neural Module in training mode.")
         with torch.no_grad():
             audio = self.waveglow.infer(mel_spectrogram, sigma=self._sigma)
         return audio
@@ -225,6 +236,7 @@ class WaveGlowLoss(LossNM):
     """
 
     @property
+    @add_port_docs()
     def input_ports(self):
         """Returns definitions of module input ports.
         """
@@ -233,20 +245,22 @@ def input_ports(self):
             # "z": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
             # "log_s_list": NeuralType(),
             # "log_det_W_list": NeuralType(),
-            "z": NeuralType(('B', 'T'), AudioSignal()),
+            "z": NeuralType(('B', 'T'), AudioSignal(freq=self.sample_rate)),
             "log_s_list": NeuralType(elements_type=ChannelType()),
             "log_det_W_list": NeuralType(elements_type=ChannelType()),
         }
 
     @property
+    @add_port_docs()
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         return {"loss": NeuralType(elements_type=LossType())}
 
-    def __init__(self, sigma: float = 1.0):
+    def __init__(self, sample_rate: int, sigma: float = 1.0):
         super().__init__()
         self.sigma = sigma
+        self.sample_rate = sample_rate
 
     def _loss_function(self, **kwargs):
         return self._loss(*(kwargs.values()))
diff --git a/nemo/constants.py b/nemo/constants.py
index 6cd3a1f60ff8..9d6793d7630a 100644
--- a/nemo/constants.py
+++ b/nemo/constants.py
@@ -47,4 +47,5 @@
 # NEMO_ENV_VARNAME_DEBUG_VERBOSITY = "NEMO_DEBUG_VERBOSITY"
 NEMO_ENV_VARNAME_ENABLE_COLORING = "NEMO_ENABLE_COLORING"
 NEMO_ENV_VARNAME_REDIRECT_LOGS_TO_STDERR = "NEMO_REDIRECT_LOGS_TO_STDERR"
+NEMO_ENV_VARNAME_TESTING = "NEMO_TESTING"
 # NEMO_ENV_VARNAME_SAVE_LOGS_TO_DIR        = "NEMO_SAVE_LOGS_TO_DIR"
diff --git a/nemo/core/__init__.py b/nemo/core/__init__.py
index e48567b139a6..11598beb000f 100644
--- a/nemo/core/__init__.py
+++ b/nemo/core/__init__.py
@@ -15,7 +15,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .callbacks import *
-from .neural_factory import *
-from .neural_modules import *
-from .neural_types import *
+from nemo.core.callbacks import *
+from nemo.core.nemo_model import NeMoModel
+from nemo.core.neural_factory import *
+from nemo.core.neural_graph import *
+from nemo.core.neural_modules import *
+from nemo.core.neural_types import *
diff --git a/nemo/core/actions.py b/nemo/core/actions.py
new file mode 100755
index 000000000000..ad0757e04b39
--- /dev/null
+++ b/nemo/core/actions.py
@@ -0,0 +1,298 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from typing import List, Optional, Union
+
+from nemo.core.neural_factory import Optimization
+from nemo.core.neural_modules import ModuleType
+from nemo.core.neural_types import NmTensor
+from nemo.utils.app_state import AppState
+
+
+def topological_sort_from_leaves(leaf_nmtensors: List[NmTensor], cached_training_state: 'TrainingState' = None):
+    """A function that accepts a list of NmTensors that need to be computed and constructs a callchain DAG that starts
+    from a datalayerNM and can be used to compute the NmTensors.
+
+    args:
+        leaf_nmtensors (List[NmTensors]): The tensors to be computed
+        cached_training_state (TrainingState): A dictionary of already computed tensors.
+            Defaults to None meaning an empty cache.
+
+    returns:
+        top_sorted_modules: the callchain DAG
+    """
+
+    def create_node(producer, producer_args):
+        if producer_args is None:
+            return tuple((producer, ()))
+        return tuple((producer, tuple([(k, v) for k, v in producer_args.items()]),))
+
+    def is_in_degree_zero(node, processed_nodes, cached_training_state):
+        """A node has in degree of zero"""
+        if node[1] == ():
+            return True
+        for _, nmtensor in node[1]:
+            node = create_node(nmtensor.producer, nmtensor.producer_args)
+            if node not in processed_nodes:
+                if cached_training_state and cached_training_state.check_tensor_cached(nmtensor.unique_name):
+                    continue
+                return False
+        return True
+
+    hooks = leaf_nmtensors if isinstance(leaf_nmtensors, list) else [leaf_nmtensors]
+
+    # ensures that no tensors are processed twice
+    processed_nmtensors = set()
+
+    indices_to_remove = []
+    # Check for duplicates in hook
+    for i, nmtensor in enumerate(hooks):
+        if nmtensor in processed_nmtensors:
+            indices_to_remove.append(i)
+        else:
+            processed_nmtensors.add(nmtensor)
+
+    for i in reversed(indices_to_remove):
+        hooks.pop(i)
+
+    _top_sorted_modules = []
+    all_nodes = {}
+
+    # extract all nodes to all_nodes set
+    hooks_lst = list(hooks)
+    while len(hooks_lst) > 0:
+        # take nmtensor from the end of the list
+        nmtensor = hooks_lst.pop()
+        producer_args = nmtensor.producer_args
+
+        node = create_node(nmtensor.producer, producer_args)
+        # Store nmtensor as an output of its producer
+        # first make sure all keys are present per output port
+        # and nm is inside all_nodes
+        if node not in all_nodes:
+            all_nodes[node] = {k: None for k in nmtensor.producer.output_ports}
+        # second, populate output port with current nmtensor
+        # where applicable
+        all_nodes[node][nmtensor.name] = nmtensor
+        processed_nmtensors.add(nmtensor)
+
+        new_tensors = set()
+        if producer_args is not None and producer_args != {}:
+            for _, new_nmtensor in producer_args.items():
+                if new_nmtensor not in processed_nmtensors:
+                    new_tensors.add(new_nmtensor)
+
+        if cached_training_state:
+            for _, input_nmtensor in producer_args.items():
+                if cached_training_state.check_tensor_cached(input_nmtensor.unique_name):
+                    new_tensors.remove(input_nmtensor)
+
+        for new_nmtensor in new_tensors:
+            # put in the start of list
+            hooks_lst.insert(0, new_nmtensor)
+
+    all_node_with_output = []
+    # Iterate over all_nodes to create new nodes that include its output
+    # now all nodes have (module, input tensors, output tensors)
+    for node in all_nodes:
+        all_node_with_output.append(tuple((node[0], node[1], all_nodes[node])))
+
+    processed_nodes = []
+    while len(all_node_with_output) > 0:
+        for node in all_node_with_output.copy():
+            # if node's in_degree is zero it can be added to
+            # _top_sorted_modules
+            # this will also reduce in_degree of its children
+            if is_in_degree_zero(node, processed_nodes, cached_training_state):
+                _top_sorted_modules.append(node)
+                processed_nodes.append((node[0], node[1]))
+                all_node_with_output.remove(node)
+
+    # Create top_sorted_modules aka callchain
+    top_sorted_modules = []
+    for i, mod in enumerate(_top_sorted_modules):
+        top_sorted_modules.append((mod[0], dict(mod[1]), mod[2]))
+        # Ensure that there is only one dataset in callchain
+        if i > 0 and mod[0].type == ModuleType.datalayer:
+            raise ValueError("There were more than one DataLayer NeuralModule inside your DAG.")
+
+        if cached_training_state and mod[0].type == ModuleType.datalayer:
+            raise ValueError("Could not compute tensor from current cached training state.")
+
+    return top_sorted_modules
+
+
+class TrainingState:
+    def __init__(self, action: 'Actions'):
+        """A class used to wrap the current training state of an Actions.train() function. This class holds a mapping
+        of tensor.unique_name -> it's backend tensor (eg Pytorch Tensor) or None if the tensor has been been computed
+        on the current step.
+
+        args:
+            action (Actions): The Actions object this state is associated with.
+        """
+        tensor_naming_registery = AppState().tensor_names
+        self.tensor_dict = {}.fromkeys(tensor_naming_registery.unique_names, None)
+        self._action = action
+
+    def tensor_list(self):
+        """Returns a list the unique names of all tensors.
+        """
+        return self.tensor_dict.keys()
+
+    def clear_dict(self):
+        """Clears the dictionary by setting all values to None. Used in-between training batches to clear it's state.
+        """
+        for name in self.tensor_dict:
+            self.tensor_dict[name] = None
+
+    def set_tensor(self, tensor: NmTensor, value: 'torch.Tensor'):
+        """Sets the value of tensor
+
+        args:
+            tensor (NmTensor)
+            value (torch.Tensor)
+        """
+        self.tensor_dict[tensor.unique_name] = value
+
+    def check_tensor_cached(self, unique_name: str):
+        """Checks to see the tensor value has been computed in the current step yet.
+
+        args:
+            unique_name (str): The NmTensor.unique_name that we want to check for.
+
+        returns:
+            (bool) whether the tensor with unique_name has been computed yet.
+        """
+        if self.tensor_dict[unique_name] is None:
+            return False
+        return True
+
+    def get_tensor(self, name: Union[str, NmTensor], compute: bool = True):
+        """Returns the value associated with a tensor. And optionally, computes the value of the tensor if not already
+        set.
+
+        args:
+            name (str, NmTensor): The user-defined name for a tensor or the NmTensor itself.
+            compute (bool): If True and the tensor has not already been computed, there will be an attempt to create a
+                call DAG and then do a forward pass on this call DAG to compute the tensor. If False, it will return
+                None if the tensor has not been computed yet.
+                Defaults to True.
+
+        returns:
+            (torch.tensor or None) representing the computed value of the requested name. Returns None if compute is
+            False and the tensor has not been computed yet.
+        """
+        if isinstance(name, NmTensor):
+            unique_name = name.unique_name
+        else:
+            unique_name = AppState().tensor_names[name]
+        tensor_value = self.tensor_dict[unique_name]
+        if tensor_value is None and compute:
+            nmtensor = AppState().tensor_names._nmtensor_uniname_dict[unique_name]
+            callchain = topological_sort_from_leaves([nmtensor], cached_training_state=self)
+            callchain.insert(0, ())
+            self._action.nm_graph_forward_pass(callchain, self.tensor_dict)
+            tensor_value = self.tensor_dict[unique_name]
+        return tensor_value
+
+
+class Actions(ABC):
+    """Basic actions allowed on graphs of Neural Modules"""
+
+    def __init__(self, local_rank, global_rank, optimization_level=Optimization.mxprO0):
+        self._local_rank = local_rank
+        self._global_rank = global_rank
+        self._optim_level = optimization_level
+
+    @property
+    def local_rank(self):
+        """Local rank during distributed execution. None if single GPU/CPU
+
+        Returns:
+            (int) rank or worker or None if not in distributed model
+        """
+        return self._local_rank
+
+    @property
+    def global_rank(self):
+        """Global rank during distributed execution. None if single GPU/CPU
+
+        Returns:
+            (int) rank or worker or None if not in distributed model
+        """
+        return self._global_rank
+
+    @abstractmethod
+    def train(
+        self,
+        tensors_to_optimize: List[NmTensor],
+        callbacks: Optional[List[Union['ActionCallback', 'NeMoCallback']]],
+        lr_policy=None,
+        batches_per_step=None,
+        stop_on_nan_loss=False,
+    ):
+        """This action executes training and (optionally) evaluation.
+
+        Args:
+            tensors_to_optimize: which tensors to optimize. Typically this is
+                single loss tesnor.
+            callbacks: list of callback objects
+            lr_policy: function which should take (initial_lr, step, epoch) and
+                return learning rate
+            batches_per_step: number of mini-batches to process before one
+                optimizer step. (default: None, same as 1). Use this
+                to simulate larger batch sizes on hardware which could not fit
+                larger batch in memory otherwise. Effectively, this will make
+                "algorithmic" batch size per GPU/worker = batches_per_step*
+                batch_size
+            stop_on_nan_loss: (default: False) If set to True, the training
+                will stop if loss=nan or inf. If set to False, the training
+                will continue.
+
+        Returns:
+            None
+        """
+        pass
+
+    @abstractmethod
+    def infer(self, tensors: List[NmTensor]):
+        """This action executes inference. Nothing is optimized.
+        Args:
+          tensors: which tensors to evaluate.
+
+        Returns:
+          None
+        """
+        pass
+
+    @abstractmethod
+    def create_optimizer(self, optimizer, things_to_optimize, optimizer_params):
+        """
+        Creates an optimizer object to be use in the train() method.
+
+        Args:
+            optimizer: Specifies which optimizer to use.
+            things_to_optimize: A list of neural modules or tensors to be
+                optimized.
+            optimizer_params: Specifies the parameters of the optimizer
+
+        Returns:
+            Optimizer
+        """
+        pass
diff --git a/nemo/core/callbacks.py b/nemo/core/callbacks.py
index 1ebf3675e270..d5e7bc1c3d6a 100644
--- a/nemo/core/callbacks.py
+++ b/nemo/core/callbacks.py
@@ -15,233 +15,396 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# __all__ = [
+#     "NeMoCallback",
+#     "SimpleLogger",
+#     "TensorboardLogger",
+#     "WandBLogger",
+#     "CheckpointCallback",
+#     "on_train_start",
+#     "on_train_end",
+#     "on_epoch_start",
+#     "on_epoch_end",
+#     "on_batch_start",
+#     "on_batch_end",
+#     "on_step_start",
+#     "on_step_end",
+# ]
+
 import glob
 import os
-import sys
 import time
-import warnings
-from abc import ABC, abstractmethod
-from collections import namedtuple
+from abc import ABC
+from typing import Callable, List, Union
+
+from nemo.core.deprecated_callbacks import (
+    ActionCallback,
+    EvaluatorCallback,
+    ModuleSaverCallback,
+    SimpleLossLoggerCallback,
+    UnfreezeCallback,
+    ValueSetterCallback,
+    WandbCallback,
+)
+from nemo.core.neural_types import NmTensor
+from nemo.utils import get_checkpoint_from_dir, logging
+from nemo.utils.app_state import AppState
+
+try:
+    import wandb
+
+    _WANDB_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    _WANDB_AVAILABLE = False
+
+
+class NeMoCallback(ABC):
+    """The base class for callbacks inside of NeMo. It contains no __init__ which children classes are responsible for.
+    Each callback contains 8 functions which are called at different stages of train(). All functions must take as the
+    first argument: the current action state. This state is a StateWrapper object.
+    TODO: Add a link to documentation.
+    """
+
+    def on_train_start(self, state):
+        pass
+
+    def on_epoch_start(self, state):
+        pass
+
+    def on_batch_start(self, state):
+        pass
+
+    def on_step_start(self, state):
+        pass
+
+    def on_step_end(self, state):
+        pass
+
+    def on_batch_end(self, state):
+        pass
 
-import nemo
-from ..utils import get_checkpoint_from_dir
+    def on_epoch_end(self, state):
+        pass
+
+    def on_train_end(self, state):
+        pass
 
 
-class ActionCallback(ABC):
-    """Abstract interface for callbacks.
+def on_train_start(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_train_start callback event.
     """
 
-    def __init__(self):
-        self._registered_tensors = {}
-        self._action = None
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
 
-    @property
-    def step(self):
-        return self.action.step
+        def on_train_start(self, state):
+            self._func(state)
 
-    @property
-    def epoch_num(self):
-        return self.action.epoch_num
+    return NeMoCallbackWrapper(func)
 
-    @property
-    def registered_tensors(self):
-        return self._registered_tensors
 
-    @property
-    def local_rank(self):
-        return self.action.local_rank
+def on_epoch_start(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_epoch_start callback event.
+    """
 
-    @property
-    def global_rank(self):
-        return self.action.global_rank
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
 
-    @property
-    def action(self):
-        return self._action
+        def on_epoch_start(self, state):
+            self._func(state)
 
-    @action.setter
-    def action(self, action_obj):
-        self._action = action_obj
+    return NeMoCallbackWrapper(func)
 
-    @property
-    def logger(self):
-        warnings.warn("This will be deprecated in future releases. Please use " "nemo.logging instead")
-        return nemo.logging
 
-    def on_action_start(self):
-        pass
+def on_batch_start(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_batch_start callback event.
+    """
 
-    def on_action_end(self):
-        pass
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
 
-    def on_epoch_start(self):
-        pass
+        def on_batch_start(self, state):
+            self._func(state)
 
-    def on_epoch_end(self):
-        pass
+    return NeMoCallbackWrapper(func)
 
-    def on_iteration_start(self):
-        pass
 
-    def on_iteration_end(self):
-        pass
+def on_step_start(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_step_start callback event.
+    """
+
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
+
+        def on_step_start(self, state):
+            self._func(state)
 
+    return NeMoCallbackWrapper(func)
 
-class ModuleSaverCallback(ActionCallback):
+
+def on_step_end(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_step_end callback event.
     """
-    For callback documentation: please see
-    https://nvidia.github.io/NeMo/tutorials/callbacks.html
+
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
+
+        def on_step_end(self, state):
+            self._func(state)
+
+    return NeMoCallbackWrapper(func)
+
+
+def on_batch_end(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_batch_end callback event.
     """
 
-    def __init__(
-        self, save_modules_list, step_freq=1000, folder=None, checkpoints_to_keep=4,
-    ):
-        super().__init__()
-        self._save_modules_list = save_modules_list
-        self._folder = folder
-        self._step_freq = step_freq
-        self._ckpt2keep = checkpoints_to_keep
-        self._saved_ckpts = []
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
 
-    def on_iteration_end(self):
-        step = self.step
-        if (
-            self._step_freq > 0
-            and step % self._step_freq == 0
-            and step > 0
-            and (self.global_rank is None or self.global_rank == 0)
-        ):
-            for m in self._save_modules_list:
-                class_name = m.__class__.__name__
-                uid = m.unique_instance_id
-                fn = f"{class_name}_{uid}-STEP-{step}.pt"
-                if self._folder is None:
-                    file_name = fn
-                else:
-                    file_name = os.path.join(self._folder, fn)
-                nemo.logging.info(f"Saving module {class_name} in {file_name}")
-                m.save_to(file_name)
-                nemo.logging.info("Saved.")
-            self._saved_ckpts.append(f'-{self.step}.pt')
-            if len(self._saved_ckpts) > self._ckpt2keep:
-                for end in self._saved_ckpts[: -self._ckpt2keep]:
-                    for file in glob.glob(f'{self._folder}/*{end}'):
-                        os.remove(file)
-                self._saved_ckpts = self._saved_ckpts[-self._ckpt2keep :]
-
-    def on_action_end(self):
-        step = self.step
-        if self.global_rank is None or self.global_rank == 0:
-            for m in self._save_modules_list:
-                class_name = m.__class__.__name__
-                uid = m.unique_instance_id
-                fn = f"{class_name}_{uid}-STEP-{step}.pt"
-                if self._folder is None:
-                    file_name = fn
-                else:
-                    file_name = os.path.join(self._folder, fn)
-                nemo.logging.info(f"Saving module {class_name} in {file_name}")
-                m.save_to(file_name)
-                nemo.logging.info("Saved.")
+        def on_batch_end(self, state):
+            self._func(state)
 
+    return NeMoCallbackWrapper(func)
 
-class SimpleLossLoggerCallback(ActionCallback):
+
+def on_epoch_end(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_epoch_end callback event.
     """
-    For callback documentation: please see
-    https://nvidia.github.io/NeMo/tutorials/callbacks.html
+
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
+
+        def on_epoch_end(self, state):
+            self._func(state)
+
+    return NeMoCallbackWrapper(func)
+
+
+def on_train_end(func):
+    """A function decorator that wraps a Callable inside the NeMoCallback object and runs the function with the
+    on_train_end callback event.
     """
 
+    class NeMoCallbackWrapper(NeMoCallback):
+        def __init__(self, my_func):
+            self._func = my_func
+
+        def on_train_end(self, state):
+            self._func(state)
+
+    return NeMoCallbackWrapper(func)
+
+
+class SimpleLogger(NeMoCallback):
+    def __init__(self, step_freq: int = 100, tensors_to_log: List[Union[str, NmTensor]] = ["loss"]):
+        """A simple callback that prints tensors to screen. It's default option is to print the training loss every
+        100 steps. Additional tensors can be printed by adding them to the tensors_to_log argument.
+
+        args:
+            step_freq (int): The frequency of printing to screen. Defaults to every 100 steps
+            tensors_to_log (List of str or NmTensor): A list of either tensor names or NmTensors which will be printed
+                every step_freq steps.
+                Defaults to ["loss"] which only prints the loss.
+        """
+        self.step_freq = step_freq
+        self.tensors_to_log = tensors_to_log
+
+    def on_step_end(self, state):
+        if state["step"] % self.step_freq == 0:
+            for tensor_key in self.tensors_to_log:
+                tensor = state["tensors"].get_tensor(tensor_key)
+                logging.info("%s: %s", tensor_key, tensor)
+
+
+class TensorboardLogger(NeMoCallback):
     def __init__(
-        self, tensors, print_func=None, get_tb_values=None, log_to_tb_func=None, step_freq=25, tb_writer=None,
+        self,
+        tb_writer: 'torch.utils.tensorboard.SummaryWriter',
+        step_freq: int = 100,
+        tensors_to_log: List[Union[str, NmTensor]] = ["loss"],
+        custom_tb_log_func: Callable[[Union[str, NmTensor]], None] = None,
+        log_epoch: bool = True,
+        log_lr: bool = True,
     ):
+        """A tensorboard callback that logs tensors using a tensorboard writer object. It's default option is to log
+        the loss every 100 steps. Additional scalar tensors can be logged by adding them to the tensors_to_log
+        argument. In order to log complex tensorboard entities, the custom_tb_log_func must be passed it. By default,
+        it always logs the current epoch and the time taken per epoch.
+
+        args:
+            tb_writer (required): The tensorboard logger object.
+            step_freq (int): The frequency of tensorboard logging. Defaults to every 100 steps
+            tensors_to_log (List of str or NmTensor): A list of either tensor names or NmTensors which will be logged
+                every step_freq steps.
+                Defaults to ["loss"] which only prints the loss.
+            custom_tb_log_func (func): TensorboardLogger loops through tensors_to_log and passes these elements to
+                custom_tb_log_func. So a custom_tb_log_func will receive one argument on each call with the arugment
+                being an element from tensors_to_log.
+                Defaults to None which logs each tensors_to_log as a scalar.
+            log_epoch (bool): Whether to log epoch and epoch training time to tensorboard.
+                Defaults to True.
+            log_lr (bool): Whether to log the learning rate to tensorboard.
+                Defaults to True.
+        """
+        self.step_freq = step_freq
+        self.tensors_to_log = tensors_to_log
+        self.tb_writer = tb_writer
+        self.custom_tb_log_func = custom_tb_log_func
+        self._last_epoch_start = None
+        self._log_epoch = log_epoch
+        self._log_lr = log_lr
+
+    def on_epoch_start(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            self._last_epoch_start = time.time()
+
+    def on_epoch_end(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            if self._log_epoch:
+                epoch_time = time.time() - self._last_epoch_start
+                self.tb_writer.add_scalar('misc/epoch', state["epoch"], state["step"])
+                self.tb_writer.add_scalar('misc/epoch_time', epoch_time, state["step"])
+
+    def on_step_end(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            if state["step"] % self.step_freq == 0:
+                tb_log_func = lambda x: self.tb_writer.add_scalar(x, state["tensors"].get_tensor(x), state["step"])
+                if self.custom_tb_log_func is not None:
+                    tb_log_func = self.custom_tb_log_func
+                for tensor_key in self.tensors_to_log:
+                    tb_log_func(tensor_key)
 
-        super().__init__()
-        if not isinstance(tensors, list):
-            tensors = [tensors]
-        self._tensors = tensors
-        self._print_func = print_func
-        self._get_tb_values = get_tb_values
-        self._log_to_tb_func = log_to_tb_func
+                if self._log_lr:
+                    self.tb_writer.add_scalar('param/lr', state["optimizers"][0].param_groups[0]['lr'], state["step"])
+
+
+class WandBLogger(NeMoCallback):
+    def __init__(
+        self,
+        step_freq: int = 100,
+        tensors_to_log: List[Union[str, NmTensor]] = ["loss"],
+        wandb_name: str = None,
+        wandb_project: str = None,
+        args=None,
+        log_epoch: bool = True,
+        log_lr: bool = True,
+    ):
+        """A [Weights & Biases](https://docs.wandb.com/) callback that logs tensors to W&B. It's default option is to
+        log the loss every 100 steps. Additional scalar tensors can be logged by adding them to the tensors_to_log
+        argument. By default, it always logs the current epoch and the time taken per epoch.
+
+        args:
+            step_freq (int): The frequency of Weights and Biases logging. Defaults to every 100 steps
+            tensors_to_log (List of str or NmTensor): A list of either tensor names or NmTensors which will be logged
+                every step_freq steps.
+                Defaults to ["loss"] which only prints the loss.
+            wandb_name(str): wandb experiment name.
+                Defaults to None
+            wandb_project(str): wandb project name.
+                Defaults to None
+            args: argparse flags which will be logged as hyperparameters.
+                Defaults to None.
+            log_epoch (bool): Whether to log epoch and epoch training time to Weights and Biases.
+                Defaults to True.
+            log_lr (bool): Whether to log epoch and epoch training time to Weights and Biases.
+                Defaults to True.
+        """
+        if not _WANDB_AVAILABLE:
+            logging.error("Could not import wandb. Did you install it (pip install --upgrade wandb)?")
         self._step_freq = step_freq
-        self._swriter = tb_writer
-        self._start_time = None
+        self._tensors_to_log = tensors_to_log
+        self._name = wandb_name
+        self._project = wandb_project
+        self._args = args
         self._last_epoch_start = None
-        self._last_iter_start = None
-
-    @property
-    def tensors(self):
-        return self._tensors
-
-    def on_action_start(self):
-        if self.global_rank is None or self.global_rank == 0:
-            nemo.logging.info("Starting .....")
-            self._start_time = time.time()
-
-    def on_action_end(self):
-        if self.global_rank is None or self.global_rank == 0:
-            if self._swriter is not None:
-                self._swriter.close()
-            nemo.logging.info(f"Done in {time.time() - self._start_time}")
-
-    def on_epoch_start(self):
-        if self.global_rank is None or self.global_rank == 0:
-            nemo.logging.info(f"Starting epoch {self.epoch_num}")
+        self._log_epoch = log_epoch
+        self._log_lr = log_lr
+
+    def on_train_start(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            if _WANDB_AVAILABLE and wandb.run is None:
+                wandb.init(name=self._name, project=self._project)
+                if self._args is not None:
+                    wandb.config.update(self._args)
+            elif _WANDB_AVAILABLE and wandb.run is not None:
+                logging.info("Re-using wandb session")
+            else:
+                logging.error("Could not import wandb. Did you install it (pip install --upgrade wandb)?")
+                logging.info("Will not log data to weights and biases.")
+                self._step_freq = -1
+
+    def on_step_end(self, state):
+        # log training metrics
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            if state["step"] % self._step_freq == 0 and self._step_freq > 0:
+                tensors_logged = {t: state["tensors"].get_tensor(t).cpu() for t in self._tensors_to_log}
+                # Always log learning rate
+                if self._log_lr:
+                    tensors_logged['LR'] = state["optimizers"][0].param_groups[0]['lr']
+                self._wandb_log(tensors_logged, state["step"])
+
+    def on_epoch_start(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
             self._last_epoch_start = time.time()
 
-    def on_epoch_end(self):
-        if self.global_rank is None or self.global_rank == 0:
-            step = self.step
-            run_time = time.time() - self._last_epoch_start
-            nemo.logging.info(f"Finished epoch {self.epoch_num} in {run_time}")
-            if self._swriter is not None:
-                value = self.epoch_num
-                self._swriter.add_scalar('misc/epoch', value, step)
-                value = time.time() - self._last_epoch_start
-                self._swriter.add_scalar('misc/epoch_time', value, step)
-
-    def on_iteration_start(self):
-        if self.global_rank is None or self.global_rank == 0:
-            self._last_iter_start = time.time()
-
-    def on_iteration_end(self):
-        if self.global_rank is None or self.global_rank == 0:
-            step = self.step
-            if step % self._step_freq == 0:
-                tensor_values = [self.registered_tensors[t.unique_name] for t in self.tensors]
-
-                nemo.logging.info(f"Step: {step}")
-                if self._print_func:
-                    self._print_func(tensor_values)
-                sys.stdout.flush()
-                if self._swriter is not None:
-                    if self._get_tb_values:
-                        tb_objects = self._get_tb_values(tensor_values)
-                        for name, value in tb_objects:
-                            value = value.item()
-                            self._swriter.add_scalar(name, value, step)
-                    if self._log_to_tb_func:
-                        self._log_to_tb_func(self._swriter, tensor_values, step)
-                    run_time = time.time() - self._last_iter_start
-                    self._swriter.add_scalar('misc/step_time', run_time, step)
-                run_time = time.time() - self._last_iter_start
-                nemo.logging.info(f"Step time: {run_time} seconds")
-
-
-class CheckpointCallback(ActionCallback):
-    """
-    For callback documentation: please see
-    https://nvidia.github.io/NeMo/tutorials/callbacks.html
-    """
+    def on_epoch_end(self, state):
+        if state["global_rank"] is None or state["global_rank"] == 0:
+            if self._log_epoch:
+                epoch_time = time.time() - self._last_epoch_start
+                self._wandb_log({"epoch": state["epoch"], "epoch_time": epoch_time}, state["step"])
 
+    @staticmethod
+    def _wandb_log(tensors_logged, step):
+        if _WANDB_AVAILABLE:
+            wandb.log(tensors_logged, step=step)
+
+
+class CheckpointCallback(NeMoCallback):
     def __init__(
-        self, folder, load_from_folder=None, step_freq=-1, epoch_freq=-1, checkpoints_to_keep=4, force_load=False,
+        self,
+        folder: str,
+        load_from_folder: str = None,
+        step_freq: int = -1,
+        epoch_freq: int = -1,
+        checkpoints_to_keep: int = 4,
+        force_load: bool = False,
     ):
-        super().__init__()
+        """A callback that does checkpointing of module weights and trainer (incl. optimizer) status.
+
+        args:
+            folder (str, required): A path where checkpoints are to be stored and loaded from if load_from_folder is
+                None.
+            load_from_folder (str): A path where checkpoints can be loaded from.
+                Defaults to None.
+            step_freq (int): How often in terms of steps to save checkpoints. One of step_freq or epoch_freq is
+                required.
+            epoch_freq (int): How often in terms of epochs to save checkpoints. One of step_freq or epoch_freq is
+                required.
+            checkpoints_to_keep (int): Number of most recent checkpoints to keep. Older checkpoints will be deleted.
+                Defaults to 4.
+            force_load (bool): Whether to crash if loading is unsuccessful.
+                Defaults to False
+        """
         if step_freq == -1 and epoch_freq == -1:
-            nemo.logging.warning("No checkpoints will be saved because step_freq and " "epoch_freq are both -1.")
+            logging.warning("No checkpoints will be saved because step_freq and epoch_freq are both -1.")
 
         if step_freq > -1 and epoch_freq > -1:
-            nemo.logging.warning("You config the model to save by both steps and epochs. " "Save by step_freq only")
+            logging.warning("You config the model to save by both steps and epochs. Please use one or the other")
             epoch_freq = -1
 
         self._step_freq = step_freq
@@ -253,55 +416,53 @@ def __init__(
         # If True, run will fail if we cannot load module weights
         self._force_load = force_load
 
-    def __save_to(self, path):
-        if self.global_rank is not None and self.global_rank != 0:
+    def __save_to(self, path, state):
+        if state["global_rank"] is not None and state["global_rank"] != 0:
             return
         if not os.path.isdir(path):
-            nemo.logging.info(f"Creating {path} folder")
+            logging.info(f"Creating {path} folder")
             os.makedirs(path, exist_ok=True)
         unique_mod_names = set()
-        for module in self.action.modules:
+        for module in AppState().modules:
             if module.num_weights > 0:
                 if str(module) in unique_mod_names:
                     raise NotImplementedError(
-                        "There were two instances of the same module. Please "
-                        "overwrite __str__() of one of the modules."
+                        "There were two instances of the same module. Please overwrite __str__() of one of the "
+                        "modules."
                     )
                 unique_mod_names.add(str(module))
                 if self._step_freq > -1:
-                    filename = f"{module}-STEP-{self.step}.pt"
+                    filename = f"{module}-STEP-{state['step']}.pt"
                 else:
-                    filename = f"{module}-EPOCH-{self.epoch_num}.pt"
+                    filename = f"{module}-EPOCH-{state['epoch']}.pt"
                 module.save_to(os.path.join(path, filename))
 
         if self._step_freq > -1:
-            filename = f"trainer-STEP-{self.step}.pt"
-            self.action.save_state_to(f'{path}/{filename}')
-            self._saved_ckpts.append(f'-{self.step}.pt')
+            filename = f"trainer-STEP-{state['step']}.pt"
+            state.save_state_to(f"{path}/{filename}")
+            self._saved_ckpts.append(f"-{state['step']}.pt")
         else:
-            filename = f"trainer-EPOCH-{self.epoch_num}.pt"
-            self.action.save_state_to(f'{path}/{filename}')
-            self._saved_ckpts.append(f'-{self.epoch_num}.pt')
+            filename = f"trainer-EPOCH-{state['epoch']}.pt"
+            state.save_state_to(f"{path}/{filename}")
+            self._saved_ckpts.append(f"-{state['epoch']}.pt")
 
         if len(self._saved_ckpts) > self._ckpt2keep:
             for end in self._saved_ckpts[: -self._ckpt2keep]:
                 for file in glob.glob(f'{path}/*{end}'):
                     os.remove(file)
             self._saved_ckpts = self._saved_ckpts[-self._ckpt2keep :]
-        nemo.logging.info(f'Saved checkpoint: {path}/{filename}')
+        logging.info(f'Saved checkpoint: {path}/{filename}')
 
-    def __restore_from(self, path):
+    def __restore_from(self, path, state):
         if not os.path.isdir(path):
             if self._force_load:
-                raise ValueError(
-                    "force_load was set to True for checkpoint " "callback but a checkpoint was not found."
-                )
-            nemo.logging.warning(f"Checkpoint folder {path} not found!")
+                raise ValueError("force_load was set to True for checkpoint callback but a checkpoint was not found.")
+            logging.warning(f"Checkpoint folder {path} not found!")
         else:
-            nemo.logging.info(f"Restoring checkpoint from folder {path} ...")
+            logging.info(f"Found checkpoint folder {path}. Will attempt to restore checkpoints from it.")
             modules_to_restore = []
             modules_to_restore_name = []
-            for module in self.action.modules:
+            for module in AppState().modules:
                 if module.num_weights > 0:
                     modules_to_restore.append(module)
                     modules_to_restore_name.append(str(module))
@@ -309,258 +470,58 @@ def __restore_from(self, path):
                 module_checkpoints = get_checkpoint_from_dir(modules_to_restore_name, path)
 
                 for mod, checkpoint in zip(modules_to_restore, module_checkpoints):
-                    mod.restore_from(checkpoint, self.local_rank)
-            except (BaseException, ValueError) as e:
+                    mod.restore_from(checkpoint, state["local_rank"])
+            except (ValueError) as e:
                 if self._force_load:
                     raise ValueError(
-                        "force_load was set to True for checkpoint callback" "but a checkpoint was not found."
+                        "force_load was set to True for checkpoint callback but a checkpoint was not found."
                     )
-                nemo.logging.warning(e)
-                nemo.logging.warning(f"Checkpoint folder {path} present but did not restore")
+                logging.warning(e)
+                logging.warning(
+                    f"Checkpoint folder {path} was present but nothing was restored. Continuing training from random "
+                    "initialization."
+                )
                 return
 
             try:
                 trainer_checkpoints = get_checkpoint_from_dir(["trainer"], path)
-                for tr, checkpoint in zip([self.action], trainer_checkpoints):
-                    tr.restore_state_from(checkpoint)
-            except (BaseException, ValueError) as e:
-                nemo.logging.warning(e)
-                nemo.logging.warning("Trainer state wasn't restored")
+                state.restore_state_from(trainer_checkpoints[0])
+            except (ValueError) as e:
+                logging.warning(e)
+                logging.warning(
+                    "Trainer state such as optimizer state and current step/epoch was not restored. Pretrained weights"
+                    " have still been restore and fine-tuning should continue fine."
+                )
                 return
 
-    def on_action_start(self):
+    def on_train_start(self, state):
         num_parameters = 0
         unique_mod_names = set()
-        for module in self.action.modules:
+        for module in AppState().modules:
             if module.num_weights > 0:
                 if str(module) in unique_mod_names:
                     raise NotImplementedError(
-                        "There were two instances of the same module. Please "
-                        "overwrite __str__() of one of the modules."
+                        "There were two instances of the same module. Please overwrite __str__() of one of the "
+                        "modules."
                     )
                 unique_mod_names.add(str(module))
                 num_parameters += module.num_weights
-        nemo.logging.info(f"Found {len(unique_mod_names)} modules with " f"weights:")
+        logging.info(f"Found {len(unique_mod_names)} modules with weights:")
         for name in unique_mod_names:
-            nemo.logging.info(f"{name}")
-        nemo.logging.info(f"Total model parameters: {num_parameters}")
-        self.__restore_from(path=self._load_from_folder)
+            logging.info(f"{name}")
+        logging.info(f"Total model parameters: {num_parameters}")
+        self.__restore_from(self._load_from_folder, state)
 
-    def on_iteration_end(self):
-        step = self.step
+    def on_step_end(self, state):
+        step = state["step"]
         if self._step_freq > 0 and step % self._step_freq == 0 and step > 0:
-            self.__save_to(path=self._folder)
+            self.__save_to(self._folder, state)
 
-    def on_action_end(self):
+    def on_train_end(self, state):
         if self._step_freq > 0 or self._epoch_freq > 0:
-            self.__save_to(path=self._folder)
-
-    def on_epoch_start(self):
-        self._last_epoch_start = time.time()
-
-    def on_epoch_end(self):
-        if self._epoch_freq > 0:
-            if self.global_rank is None or self.global_rank == 0:
-                run_time = time.time() - self._last_epoch_start
-                nemo.logging.info(f'Finished epoch {self.epoch_num} in {run_time}')
-                if (self.epoch_num + 1) % self._epoch_freq == 0:
-                    self.__save_to(path=self._folder)
-
-
-class EvaluatorCallback(ActionCallback):
-    """
-    For callback documentation: please see
-    https://nvidia.github.io/NeMo/tutorials/callbacks.html
-    """
-
-    def __init__(
-        self,
-        eval_tensors,
-        user_iter_callback,
-        user_epochs_done_callback,
-        tb_writer=None,
-        tb_writer_func=None,
-        eval_step=1,
-        eval_epoch=None,
-    ):
-        # TODO: Eval_epoch currently does nothing
-        if eval_step is None and eval_epoch is None:
-            raise ValueError("Either eval_step or eval_epoch must be set. " f"But got: {eval_step} and {eval_epoch}")
-        if (eval_step is not None and eval_step <= 0) or (eval_epoch is not None and eval_epoch <= 0):
-            raise ValueError(f"Eval_step and eval_epoch must be > 0." f"But got: {eval_step} and {eval_epoch}")
-        super().__init__()
-        self._eval_tensors = eval_tensors
-        self._swriter = tb_writer
-        self._tb_writer_func = tb_writer_func
-        self._eval_frequency = eval_step
-        # will be passed to callbacks below
-        self._global_var_dict = {}
-
-        # Callbacks
-        self.user_iter_callback = user_iter_callback
-        self.user_done_callback = user_epochs_done_callback
-
-    @property
-    def eval_tensors(self):
-        return self._eval_tensors
-
-    @property
-    def tb_writer_func(self):
-        return self._tb_writer_func
-
-    @property
-    def swriter(self):
-        return self._swriter
-
-    def on_epoch_end(self):
-        pass
-
-    def on_iteration_end(self):
-        step = self.step
-        if step % self._eval_frequency == 0:
-            if self.global_rank == 0 or self.global_rank is None:
-                nemo.logging.info('Doing Evaluation ' + '.' * 30)
-            start_time = time.time()
-            self.action._eval(self._eval_tensors, self, step)
-            elapsed_time = time.time() - start_time
-            if self.global_rank == 0 or self.global_rank is None:
-                nemo.logging.info(f'Evaluation time: {elapsed_time} seconds')
-
-    def on_action_end(self):
-        step = self.step
-        if self.global_rank == 0 or self.global_rank is None:
-            nemo.logging.info('Final Evaluation ' + '.' * 30)
-        start_time = time.time()
-        self.action._eval(self._eval_tensors, self, step)
-        elapsed_time = time.time() - start_time
-        if self.global_rank == 0 or self.global_rank is None:
-            nemo.logging.info(f'Evaluation time: {elapsed_time} seconds')
-
-    def clear_global_var_dict(self):
-        self._global_var_dict = {}
-
-
-# class InferenceCallback(ActionCallback):
-#     def __init__(
-#             self,
-#             eval_tensors,
-#     ):
-#         super().__init__()
-#         self._eval_tensors = eval_tensors
-#         # will be passed to callbacks below
-#         self._global_var_dict = {}
-#         self._swriter = None
-
-#     @property
-#     def eval_tensors(self):
-#         return self._eval_tensors
-
-#     def user_done_callback(self, global_var_dict):
-#         pass
-
-#     def user_iter_callback(self, tensors, global_var_dict):
-#         """ Pushes evaluated tensors to var_dict """
-#         for tensor in self._eval_tensors:
-#             key = tensor.unique_name
-#             self._global_var_dict[key] += tensors[key]
-
-#     def clear_global_var_dict(self):
-#         for tensor in self._eval_tensors:
-#             self._global_var_dict[tensor.unique_name] = []
-
-
-_Policy = namedtuple('Policy', 'method start end')
-
-
-class _Method(ABC):
-    """ Classes inherited from _Method are used for
-    ValueSetterCallback below
-    """
-
-    @abstractmethod
-    def __call__(self, step, total_steps):
-        pass
-
-
-class _Const(_Method):
-    def __init__(self, value):
-        super().__init__()
-
-        self.value = value
-
-    def __call__(self, step, total_steps):
-        return self.value
-
-
-class _Linear(_Method):
-    def __init__(self, a, b):
-        super().__init__()
-        self.a, self.b = a, b
-
-    def __call__(self, step, total_steps):
-        return self.a + (step / (total_steps - 1)) * (self.b - self.a)
-
-
-_Method.Const = _Const
-_Method.Linear = _Linear
-
-
-class ValueSetterCallback(ActionCallback):
-    Policy = _Policy
-    Method = _Method
-
-    def __init__(self, module, arg_name, policies=None, total_steps=None, tb_writer=None):
-        super().__init__()
-
-        if policies is None:
-            initial_value = getattr(module, arg_name)
-            policies = [_Policy(method=Const(initial_value), start=0.0, end=1.0)]
-
-        new_policies = []
-        for p in policies:
-            start, end = p.start, p.end
-            if isinstance(start, float):
-                start = int(start * total_steps)
-            if isinstance(end, float):
-                end = int(end * total_steps)
-            new_policies.append(_Policy(p.method, start, end))
-        policies = new_policies
-        assert policies[0].start == 0
-        assert policies[-1].end == total_steps
-
-        self.module = module
-        self.arg_name = arg_name
-        self.policies = policies
-        self.total_steps = total_steps
-        self.tb_writer = tb_writer
-
-        self.cur_i = 0
-
-    def on_iteration_start(self):
-        cur_policy = self.policies[self.cur_i]
-        if self.step < cur_policy.end:
-            step = self.step - cur_policy.start
-            total_steps = cur_policy.end - cur_policy.start
-            value = cur_policy.method(step, total_steps)
-            setattr(self.module, self.arg_name, value)
-            if self.tb_writer is not None:
-                class_name = self.module.__class__.__name__
-                # name = f'param/{class_name}.{self.arg_name}'
-                name = f"param/{class_name}.{self.arg_name}"
-                self.tb_writer.add_scalar(name, value, self.step)
-        else:
-            self.cur_i += 1
-            self.on_iteration_start()
-
-
-class UnfreezeCallback(ActionCallback):
-    def __init__(self, modules, start_epoch=0):
-        super().__init__()
-
-        self.modules = modules
-        self.start_epoch = start_epoch
+            self.__save_to(self._folder, state)
 
-    def on_iteration_start(self):
-        if self.epoch_num == self.start_epoch:
-            for m in self.modules:
-                m.unfreeze()
+    def on_epoch_end(self, state):
+        epoch = state["epoch"]
+        if self._epoch_freq > 0 and epoch % self._epoch_freq == 0 and epoch > 0:
+            self.__save_to(self._folder, state)
diff --git a/nemo/core/deprecated_callbacks.py b/nemo/core/deprecated_callbacks.py
new file mode 100755
index 000000000000..3ba8d4b88493
--- /dev/null
+++ b/nemo/core/deprecated_callbacks.py
@@ -0,0 +1,514 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = [
+    "ActionCallback",
+    "ModuleSaverCallback",
+    "SimpleLossLoggerCallback",
+    "EvaluatorCallback",
+    "ValueSetterCallback",
+    "UnfreezeCallback",
+    "WandbCallback",
+]
+
+import datetime
+import glob
+import os
+import sys
+import time
+from abc import ABC, abstractmethod
+from collections import namedtuple
+
+from nemo.utils import logging
+from nemo.utils.decorators import deprecated
+
+try:
+    import wandb
+
+    _WANDB_AVAILABLE = True
+except (ImportError, ModuleNotFoundError):
+    _WANDB_AVAILABLE = False
+
+
+class ActionCallback(ABC):
+    """Abstract interface for callbacks.
+    """
+
+    def __init__(self):
+        self._registered_tensors = {}
+        self._action = None
+
+    @property
+    def step(self):
+        return self.action.step
+
+    @property
+    def epoch_num(self):
+        return self.action.epoch_num
+
+    @property
+    def registered_tensors(self):
+        return self._registered_tensors
+
+    @property
+    def local_rank(self):
+        return self.action.local_rank
+
+    @property
+    def global_rank(self):
+        return self.action.global_rank
+
+    @property
+    def action(self):
+        return self._action
+
+    @action.setter
+    def action(self, action_obj):
+        self._action = action_obj
+
+    @property
+    def logger(self):
+        return logging
+
+    def on_action_start(self):
+        pass
+
+    def on_action_end(self):
+        pass
+
+    def on_epoch_start(self):
+        pass
+
+    def on_epoch_end(self):
+        pass
+
+    def on_iteration_start(self):
+        pass
+
+    def on_iteration_end(self):
+        pass
+
+
+class ModuleSaverCallback(ActionCallback):
+    """
+    For callback documentation: please see
+    https://nvidia.github.io/NeMo/tutorials/callbacks.html
+    """
+
+    @deprecated(version="0.12", explanation="The callback section of NeMo has been updated.")
+    def __init__(
+        self, save_modules_list, step_freq=1000, folder=None, checkpoints_to_keep=4,
+    ):
+        super().__init__()
+        self._save_modules_list = save_modules_list
+        self._folder = folder
+        self._step_freq = step_freq
+        self._ckpt2keep = checkpoints_to_keep
+        self._saved_ckpts = []
+
+    def on_iteration_end(self):
+        step = self.step
+        if (
+            self._step_freq > 0
+            and step % self._step_freq == 0
+            and step > 0
+            and (self.global_rank is None or self.global_rank == 0)
+        ):
+            for m in self._save_modules_list:
+                class_name = m.__class__.__name__
+                uid = m.unique_instance_id
+                fn = f"{class_name}_{uid}-STEP-{step}.pt"
+                if self._folder is None:
+                    file_name = fn
+                else:
+                    file_name = os.path.join(self._folder, fn)
+                logging.info(f"Saving module {class_name} in {file_name}")
+                m.save_to(file_name)
+                logging.info("Saved.")
+            self._saved_ckpts.append(f'-{self.step}.pt')
+            if len(self._saved_ckpts) > self._ckpt2keep:
+                for end in self._saved_ckpts[: -self._ckpt2keep]:
+                    for file in glob.glob(f'{self._folder}/*{end}'):
+                        os.remove(file)
+                self._saved_ckpts = self._saved_ckpts[-self._ckpt2keep :]
+
+    def on_action_end(self):
+        step = self.step
+        if self.global_rank is None or self.global_rank == 0:
+            for m in self._save_modules_list:
+                class_name = m.__class__.__name__
+                uid = m.unique_instance_id
+                fn = f"{class_name}_{uid}-STEP-{step}.pt"
+                if self._folder is None:
+                    file_name = fn
+                else:
+                    file_name = os.path.join(self._folder, fn)
+                logging.info(f"Saving module {class_name} in {file_name}")
+                m.save_to(file_name)
+                logging.info("Saved.")
+
+
+class SimpleLossLoggerCallback(ActionCallback):
+    """
+    For callback documentation: please see
+    https://nvidia.github.io/NeMo/tutorials/callbacks.html
+    """
+
+    @deprecated(version="0.12", explanation="The callback section of NeMo has been updated.")
+    def __init__(
+        self, tensors, print_func=None, get_tb_values=None, log_to_tb_func=None, step_freq=25, tb_writer=None,
+    ):
+
+        super().__init__()
+        if not isinstance(tensors, list):
+            tensors = [tensors]
+        self._tensors = tensors
+        self._print_func = print_func
+        self._get_tb_values = get_tb_values
+        self._log_to_tb_func = log_to_tb_func
+        self._step_freq = step_freq
+        self._swriter = tb_writer
+        self._start_time = None
+        self._last_epoch_start = None
+        self._last_iter_start = None
+
+    @property
+    def tensors(self):
+        return self._tensors
+
+    def on_action_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            logging.info("Starting .....")
+            self._start_time = time.time()
+
+    def on_action_end(self):
+        if self.global_rank is None or self.global_rank == 0:
+            if self._swriter is not None:
+                self._swriter.close()
+            delta = datetime.timedelta(seconds=(time.time() - self._start_time))
+            logging.info("Done in %s", delta)
+
+    def on_epoch_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            logging.info(f"Starting epoch {self.epoch_num}")
+            self._last_epoch_start = time.time()
+
+    def on_epoch_end(self):
+        if self.global_rank is None or self.global_rank == 0:
+            step = self.step
+
+            delta = datetime.timedelta(seconds=(time.time() - self._last_epoch_start))
+            logging.info(f"Finished epoch {self.epoch_num} in {delta}")
+
+            if self._swriter is not None:
+                value = self.epoch_num
+                self._swriter.add_scalar('misc/epoch', value, step)
+                value = time.time() - self._last_epoch_start
+                self._swriter.add_scalar('misc/epoch_time', value, step)
+
+    def on_iteration_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            self._last_iter_start = time.time()
+
+    def on_iteration_end(self):
+        if self.global_rank is None or self.global_rank == 0:
+            step = self.step
+            if step % self._step_freq == 0:
+                tensor_values = [self.registered_tensors[t.unique_name] for t in self.tensors]
+                logging.info(f"Step: {step}")
+                if self._print_func:
+                    self._print_func(tensor_values)
+                sys.stdout.flush()
+                if self._swriter is not None:
+                    if self._get_tb_values:
+                        tb_objects = self._get_tb_values(tensor_values)
+                        for name, value in tb_objects:
+                            value = value.item()
+                            self._swriter.add_scalar(name, value, step)
+                    if self._log_to_tb_func:
+                        self._log_to_tb_func(self._swriter, tensor_values, step)
+                    run_time = time.time() - self._last_iter_start
+                    self._swriter.add_scalar('misc/step_time', run_time, step)
+                run_time = time.time() - self._last_iter_start
+                logging.info(f"Step time: {run_time} seconds")
+
+                # To keep support in line with the removal of learning rate logging from inside actions, log learning
+                # rate to tensorboard. However it now logs ever self._step_freq as opposed to every step
+                if self._swriter is not None:
+                    self._swriter.add_scalar('param/lr', self.learning_rate, step)
+
+
+class EvaluatorCallback(ActionCallback):
+    """
+    For callback documentation: please see
+    https://nvidia.github.io/NeMo/tutorials/callbacks.html
+    """
+
+    def __init__(
+        self,
+        eval_tensors,
+        user_iter_callback,
+        user_epochs_done_callback,
+        tb_writer=None,
+        tb_writer_func=None,
+        eval_step=1,
+        eval_epoch=None,
+        wandb_name=None,
+        wandb_project=None,
+        eval_at_start=True,
+    ):
+        # TODO: Eval_epoch currently does nothing
+        if eval_step is None and eval_epoch is None:
+            raise ValueError("Either eval_step or eval_epoch must be set. " f"But got: {eval_step} and {eval_epoch}")
+        if (eval_step is not None and eval_step <= 0) or (eval_epoch is not None and eval_epoch <= 0):
+            raise ValueError(f"Eval_step and eval_epoch must be > 0." f"But got: {eval_step} and {eval_epoch}")
+        super().__init__()
+        self._eval_tensors = eval_tensors
+        self._swriter = tb_writer
+        self._tb_writer_func = tb_writer_func
+        self._eval_frequency = eval_step
+        self._eval_at_start = eval_at_start
+        # will be passed to callbacks below
+        self._global_var_dict = {}
+
+        # Callbacks
+        self.user_iter_callback = user_iter_callback
+        self.user_done_callback = user_epochs_done_callback
+
+        # Weights and biases
+        self._wandb_project = wandb_project
+        self._wandb_name = wandb_name
+
+    @property
+    def eval_tensors(self):
+        return self._eval_tensors
+
+    @property
+    def tb_writer_func(self):
+        return self._tb_writer_func
+
+    @property
+    def swriter(self):
+        return self._swriter
+
+    def on_epoch_end(self):
+        pass
+
+    def on_iteration_end(self):
+        if self.step == 0 and not self._eval_at_start:
+            return
+        if self.step % self._eval_frequency == 0:
+            if self.global_rank == 0 or self.global_rank is None:
+                logging.info('Doing Evaluation ' + '.' * 30)
+            start_time = time.time()
+            self.action._eval(self._eval_tensors, self, self.step)
+            elapsed_time = time.time() - start_time
+            if self.global_rank == 0 or self.global_rank is None:
+                logging.info(f'Evaluation time: {elapsed_time} seconds')
+
+    def on_action_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            if self._wandb_name is not None or self._wandb_project is not None:
+                if _WANDB_AVAILABLE and wandb.run is None:
+                    wandb.init(name=self._wandb_name, project=self._wandb_project)
+                elif _WANDB_AVAILABLE and wandb.run is not None:
+                    logging.info("Re-using wandb session")
+                else:
+                    logging.error("Could not import wandb. Did you install it (pip install --upgrade wandb)?")
+                    logging.info("Will not log data to weights and biases.")
+                    self._wandb_name = None
+                    self._wandb_project = None
+
+    def on_action_end(self):
+        step = self.step
+        if self.global_rank == 0 or self.global_rank is None:
+            logging.info('Final Evaluation ' + '.' * 30)
+        start_time = time.time()
+        self.action._eval(self._eval_tensors, self, step)
+        elapsed_time = time.time() - start_time
+        if self.global_rank == 0 or self.global_rank is None:
+            logging.info(f'Evaluation time: {elapsed_time} seconds')
+
+    def clear_global_var_dict(self):
+        self._global_var_dict = {}
+
+    def wandb_log(self, tensors_logged):
+        if self._wandb_name is not None and _WANDB_AVAILABLE:
+            wandb.log(tensors_logged, step=self.step)
+
+
+_Policy = namedtuple('Policy', 'method start end')
+
+
+class _Method(ABC):
+    """ Classes inherited from _Method are used for
+    ValueSetterCallback below
+    """
+
+    @abstractmethod
+    def __call__(self, step, total_steps):
+        pass
+
+
+class _Const(_Method):
+    def __init__(self, value):
+        super().__init__()
+
+        self.value = value
+
+    def __call__(self, step, total_steps):
+        return self.value
+
+
+class _Linear(_Method):
+    def __init__(self, a, b):
+        super().__init__()
+        self.a, self.b = a, b
+
+    def __call__(self, step, total_steps):
+        return self.a + (step / (total_steps - 1)) * (self.b - self.a)
+
+
+_Method.Const = _Const
+_Method.Linear = _Linear
+
+
+class ValueSetterCallback(ActionCallback):
+    Policy = _Policy
+    Method = _Method
+
+    @deprecated(version="0.12", explanation="The callback section of NeMo has been updated.")
+    def __init__(self, module, arg_name, policies=None, total_steps=None, tb_writer=None):
+        super().__init__()
+
+        if policies is None:
+            initial_value = getattr(module, arg_name)
+            policies = [_Policy(method=Const(initial_value), start=0.0, end=1.0)]
+
+        new_policies = []
+        for p in policies:
+            start, end = p.start, p.end
+            if isinstance(start, float):
+                start = int(start * total_steps)
+            if isinstance(end, float):
+                end = int(end * total_steps)
+            new_policies.append(_Policy(p.method, start, end))
+        policies = new_policies
+        assert policies[0].start == 0
+        assert policies[-1].end == total_steps
+
+        self.module = module
+        self.arg_name = arg_name
+        self.policies = policies
+        self.total_steps = total_steps
+        self.tb_writer = tb_writer
+
+        self.cur_i = 0
+
+    def on_iteration_start(self):
+        cur_policy = self.policies[self.cur_i]
+        if self.step < cur_policy.end:
+            step = self.step - cur_policy.start
+            total_steps = cur_policy.end - cur_policy.start
+            value = cur_policy.method(step, total_steps)
+            setattr(self.module, self.arg_name, value)
+            if self.tb_writer is not None:
+                class_name = self.module.__class__.__name__
+                name = f"param/{class_name}.{self.arg_name}"
+                self.tb_writer.add_scalar(name, value, self.step)
+        else:
+            self.cur_i += 1
+            self.on_iteration_start()
+
+
+class UnfreezeCallback(ActionCallback):
+    @deprecated(version="0.12", explanation="The callback section of NeMo has been updated.")
+    def __init__(self, modules, start_epoch=0):
+        super().__init__()
+
+        self.modules = modules
+        self.start_epoch = start_epoch
+
+    def on_iteration_start(self):
+        if self.epoch_num == self.start_epoch:
+            for m in self.modules:
+                m.unfreeze()
+
+
+class WandbCallback(ActionCallback):
+    """
+    Log metrics to [Weights & Biases](https://docs.wandb.com/)
+    """
+
+    @deprecated(version="0.12", explanation="The callback section of NeMo has been updated.")
+    def __init__(
+        self, train_tensors=[], wandb_name=None, wandb_project=None, args=None, update_freq=25,
+    ):
+        """
+        Args:
+            train_tensors: list of tensors to evaluate and log based on training batches
+            wandb_name: wandb experiment name
+            wandb_project: wandb project name
+            args: argparse flags - will be logged as hyperparameters
+            update_freq: frequency with which to log updates
+        """
+        super().__init__()
+
+        if not _WANDB_AVAILABLE:
+            logging.error("Could not import wandb. Did you install it (pip install --upgrade wandb)?")
+
+        self._update_freq = update_freq
+        self._train_tensors = train_tensors
+        self._name = wandb_name
+        self._project = wandb_project
+        self._args = args
+
+    def on_action_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            if _WANDB_AVAILABLE and wandb.run is None:
+                wandb.init(name=self._name, project=self._project)
+                if self._args is not None:
+                    wandb.config.update(self._args)
+            elif _WANDB_AVAILABLE and wandb.run is not None:
+                logging.info("Re-using wandb session")
+            else:
+                logging.error("Could not import wandb. Did you install it (pip install --upgrade wandb)?")
+                logging.info("Will not log data to weights and biases.")
+                self._update_freq = -1
+
+    def on_iteration_end(self):
+        # log training metrics
+        if self.global_rank is None or self.global_rank == 0:
+            if self.step % self._update_freq == 0 and self._update_freq > 0:
+                tensors_logged = {t.name: self.registered_tensors[t.unique_name].cpu() for t in self._train_tensors}
+                # Always log learning rate
+                tensors_logged['LR'] = self.learning_rate
+                self.wandb_log(tensors_logged)
+
+    def on_epoch_start(self):
+        if self.global_rank is None or self.global_rank == 0:
+            self._last_epoch_start = time.time()
+
+    def on_epoch_end(self):
+        if self.global_rank is None or self.global_rank == 0:
+            # always log epoch num and epoch_time
+            epoch_time = time.time() - self._last_epoch_start
+            self.wandb_log({"epoch": self.epoch_num, "epoch_time": epoch_time})
+
+    def wandb_log(self, tensors_logged):
+        if _WANDB_AVAILABLE:
+            wandb.log(tensors_logged, step=self.step)
diff --git a/nemo/core/nemo_model.py b/nemo/core/nemo_model.py
new file mode 100644
index 000000000000..60dcbd4a4a17
--- /dev/null
+++ b/nemo/core/nemo_model.py
@@ -0,0 +1,242 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+import shutil
+import string
+import tarfile
+from abc import abstractmethod
+from os import path
+from typing import Iterable
+
+from nemo.core.neural_factory import DeploymentFormat, OperationMode
+from nemo.core.neural_graph import NeuralGraph
+from nemo.core.neural_modules import ModuleType, NeuralModule
+from nemo.utils import logging
+
+__all__ = ['NeMoModel']
+
+NEMO_TMP_FOLDER = ".nemo_tmp"
+
+
+class NeMoModel(NeuralModule):
+    """Abstract class representing NeMoModel.
+    A NeMoModel is a kind of neural module which contains other neural modules and logic inside.
+    It typically represents a whole neural network and requires only connections with data layer and loss
+    modules for training.
+
+    The same NeMoModel could be used in training, evaluation or inference regimes. It should adjust itself
+    accordingly.
+    """
+
+    def __call__(self, **kwargs):
+        if self._operation_mode == OperationMode.training or self.operation_mode == OperationMode.both:
+            return self.train_graph(**kwargs)
+
+        else:
+            return self.eval_graph(**kwargs)
+
+    @classmethod
+    @abstractmethod
+    def from_pretrained(cls, model_info, local_rank: int = 0, referesh_cache: bool = False) -> NeuralModule:
+        """
+        Instantiates NeMoModel from pretrained checkpoint. Can do so from file on disk or from the NVIDIA NGC.
+        Args:
+            model_info: Either path to ".nemo" file or a valid NGC Model name
+            local_rank: on which GPU to instantiate.
+            referesh_cache: If set to True, then when fetching from clould, this will re-fetch the file
+                from clould even if it is  already found in a cache locally.
+
+        Returns:
+            NeMoModel instance
+        Raises:
+            NotImplemened exception when there is no pre-trained models on the cloud
+        """
+        if isinstance(model_info, str) and model_info.endswith(".nemo"):
+            nemo_file_folder, to_delete = cls.__unpack_nemo_file(path2file=model_info)
+            configuration_file = path.join(nemo_file_folder, 'module.yaml')
+            instance = cls.import_from_config(config_file=configuration_file)
+            for module in instance.modules:
+                module_checkpoint = path.join(nemo_file_folder, module.__class__.__name__ + ".pt")
+                module.restore_from(path=module_checkpoint, local_rank=local_rank)
+            shutil.rmtree(to_delete)
+            return instance
+        else:
+            raise NotImplemented("Generic from_pretrained from cloud is not implemented")
+
+    def save_to(self, output_file_name: str, output_folder: str = None, optimize_for_deployment: bool = False) -> str:
+        """
+        Saves NeMoModel to .nemo file. This file will contain:
+            * weights of all NeuralModule instances inside the model
+            * Yaml file with configuration
+            * Yaml files with topologies and configuration of training and (if applicable) eval graphs
+        Args:
+            output_file_name: filename, something like nemomodel.nemo
+            output_folder: folder where to save output_file_name. If None (default) current folder will be used.
+            optimize_for_deployment: will optimize for deployment by trying to export modules to .onnx format and
+                skipping training graph.
+
+        Returns:
+            None
+        """
+
+        def __make_nemo_file_from_folder(filename, source_dir):
+            with tarfile.open(filename, "w:gz") as tar:
+                tar.add(source_dir, arcname=os.path.basename(source_dir))
+
+        if output_folder is None:
+            output_folder = ""
+        # create temporary folder first
+        rnd_string = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(16))
+        rnd_path = path.join(output_folder, f".{rnd_string}")
+        tmp_folder = path.join(rnd_path, NEMO_TMP_FOLDER)
+        # resulting_file is path to the resulting .nemo file
+        if output_file_name.endswith(".nemo"):
+            resulting_file = path.join(output_folder, output_file_name)
+        else:
+            resulting_file = path.join(output_folder, output_file_name + ".nemo")
+        if not path.exists(tmp_folder):
+            os.makedirs(tmp_folder)
+        try:
+            # create header file
+            main_configuration_file_name = "module.yaml"
+            train_graph_file_name = "train_graph.yaml"
+            eval_graph_file_name = "eval_graph.yaml"
+
+            header_file_path = path.join(tmp_folder, 'header.content')
+            with open(header_file_path, 'w') as hf:
+                hf.write(f"Main module configuration: {main_configuration_file_name}")
+                hf.write(f"Train graph: {train_graph_file_name}")
+                if self.eval_graph is not None:
+                    hf.write(f"Eval graph: {train_graph_file_name}")
+
+            # Every NeMo model is a NeuralModule, exporiting its hyperparamers to .yaml
+            config_file_path = path.join(tmp_folder, main_configuration_file_name)
+            self.export_to_config(config_file=config_file_path)
+
+            if self.train_graph is not None and not optimize_for_deployment:
+                config_file_path_train_graph = path.join(tmp_folder, train_graph_file_name)
+                self.train_graph.export_to_config(config_file_path_train_graph)
+
+            if self.eval_graph is not None:
+                config_file_path_eval_graph = path.join(tmp_folder, eval_graph_file_name)
+                self.eval_graph.export_to_config(config_file_path_eval_graph)
+
+            for module in self.modules:
+                module_name = module.__class__.__name__
+                if optimize_for_deployment:
+                    try:
+                        module_checkpoint = module_name + ".onnx"
+                        module._factory.deployment_export(
+                            module=module,
+                            output=path.join(tmp_folder, module_checkpoint),
+                            d_format=DeploymentFormat.TRTONNX,
+                        )
+                    except Exception as ex:
+                        print(ex)
+                        logging.warning(f"Did not convert {module_name} to .onnx")
+                        module_checkpoint = module_name + ".pt"
+                        module.save_to(path.join(tmp_folder, module_checkpoint))
+                else:
+                    module_checkpoint = module_name + ".pt"
+                    module.save_to(path.join(tmp_folder, module_checkpoint))
+
+            __make_nemo_file_from_folder(resulting_file, tmp_folder)
+            logging.info(f"Exported model {self} to {resulting_file}")
+        except:
+            logging.error("Could not perform NeMoModel export")
+        finally:
+            shutil.rmtree(rnd_path)
+            pass
+
+    @property
+    @abstractmethod
+    def modules(self) -> Iterable[NeuralModule]:
+        pass
+
+    @property
+    @abstractmethod
+    def train_graph(self) -> NeuralGraph:
+        pass
+
+    @property
+    @abstractmethod
+    def eval_graph(self) -> NeuralGraph:
+        pass
+
+    def train(self):
+        """
+        Sets model to the training mode
+
+        Returns:
+            None
+        """
+        self._operation_mode = OperationMode.training
+        for module in self.modules:
+            module.operation_mode = OperationMode.training
+            if module.type == ModuleType.trainable and hasattr(module, 'train'):
+                module.train()
+
+    def eval(self):
+        """
+        Sets model to the evaluation mode
+
+        Returns:
+            None
+        """
+        self._operation_mode = OperationMode.evaluation
+        for module in self.modules:
+            module.operation_mode = OperationMode.evaluation
+            if module.type == ModuleType.trainable and hasattr(module, 'eval'):
+                module.eval()
+
+    @staticmethod
+    def __unpack_nemo_file(path2file: str, out_folder: str = None) -> str:
+        if not path.exists(path2file):
+            raise FileNotFoundError(f"{path2file} does not exist")
+        if out_folder is None:
+            out_folder = ''.join(
+                random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(16)
+            )
+
+        tar = tarfile.open(path2file, "r:gz")
+        tar.extractall(path=out_folder)
+        tar.close()
+        return path.join(out_folder, NEMO_TMP_FOLDER), out_folder
+
+    def get_weights(self):
+        raise NotImplemented()
+
+    def set_weights(
+        self, name2weight, name2name_and_transform,
+    ):
+        raise NotImplemented()
+
+    def tie_weights_with(
+        self, module, weight_names, name2name_and_transform,
+    ):
+        raise NotImplemented()
+
+    def restore_from(self, path: str):
+        raise NotImplemented("Please use from_pretrained method for NeMoModels")
+
+    def freeze(self, weights):
+        raise NotImplemented
+
+    def unfreeze(self, weights):
+        raise NotImplemented
diff --git a/nemo/core/neural_factory.py b/nemo/core/neural_factory.py
index 0692ea46095c..ae1d97cd4c9b 100644
--- a/nemo/core/neural_factory.py
+++ b/nemo/core/neural_factory.py
@@ -17,25 +17,23 @@
 
 __all__ = [
     'Backend',
-    'ModelMode',
+    'OperationMode',
     'Optimization',
     'DeviceType',
-    'Actions',
     'NeuralModuleFactory',
     'DeploymentFormat',
 ]
 
 import random
-from abc import ABC, abstractmethod
 from enum import Enum
-from typing import List, Optional
+from typing import List, Optional, Union
 
 import numpy as np
 
 import nemo
-from ..utils import ExpManager
-from .callbacks import ActionCallback, EvaluatorCallback
-from .neural_types import *
+from nemo.core.callbacks import ActionCallback, EvaluatorCallback, NeMoCallback
+from nemo.core.neural_types import NmTensor
+from nemo.utils import ExpManager, logging
 from nemo.utils.decorators import deprecated
 
 
@@ -46,6 +44,8 @@ class DeploymentFormat(Enum):
     PYTORCH = 1
     TORCHSCRIPT = 2
     ONNX = 3
+    TRTONNX = 4
+    JARVIS = 5
 
 
 class Backend(Enum):
@@ -55,11 +55,12 @@ class Backend(Enum):
     NotSupported = 2
 
 
-class ModelMode(Enum):
-    """Training Mode or Evaluation/Inference"""
+class OperationMode(Enum):
+    """Training or Inference (Evaluation) mode"""
 
-    train = 0
-    eval = 1
+    training = 0
+    evaluation = 1
+    both = 2
 
 
 class Optimization(Enum):
@@ -80,164 +81,6 @@ class DeviceType(Enum):
     AllGpu = 3
 
 
-class Actions(ABC):
-    """Basic actions allowed on graphs of Neural Modules"""
-
-    def __init__(self, local_rank, global_rank, optimization_level=Optimization.mxprO0):
-        self._local_rank = local_rank
-        self._global_rank = global_rank
-        self._optim_level = optimization_level
-        self.step = None
-        self.epoch_num = None
-
-    @property
-    def local_rank(self):
-        """Local rank during distributed execution. None if single GPU/CPU
-
-        Returns:
-            (int) rank or worker or None if not in distributed model
-        """
-        return self._local_rank
-
-    @property
-    def global_rank(self):
-        """Global rank during distributed execution. None if single GPU/CPU
-
-        Returns:
-            (int) rank or worker or None if not in distributed model
-        """
-        return self._global_rank
-
-    @abstractmethod
-    def train(
-        self,
-        tensors_to_optimize: List[NmTensor],
-        callbacks: Optional[List[ActionCallback]],
-        lr_policy=None,
-        batches_per_step=None,
-        stop_on_nan_loss=False,
-    ):
-        """This action executes training and (optionally) evaluation.
-
-        Args:
-            tensors_to_optimize: which tensors to optimize. Typically this is
-                single loss tesnor.
-            callbacks: list of callback objects
-            lr_policy: function which should take (initial_lr, step, epoch) and
-                return learning rate
-            batches_per_step: number of mini-batches to process before one
-                optimizer step. (default: None, same as 1). Use this
-                to simulate larger batch sizes on hardware which could not fit
-                larger batch in memory otherwise. Effectively, this will make
-                "algorithmic" batch size per GPU/worker = batches_per_step*
-                batch_size
-            stop_on_nan_loss: (default: False) If set to True, the training
-                will stop if loss=nan. If set to False, the training will
-                continue, but the gradients will be zeroed before next
-                mini-batch.
-
-        Returns:
-            None
-        """
-        pass
-
-    @abstractmethod
-    def infer(self, tensors: List[NmTensor]):
-        """This action executes inference. Nothing is optimized.
-        Args:
-          tensors: which tensors to evaluate.
-
-        Returns:
-          None
-        """
-        pass
-
-    @abstractmethod
-    def save_state_to(self, path: str):
-        """
-        Saves current state such as step, epoch and optimizer parameters
-        Args:
-          path:
-
-        Returns:
-
-        """
-        pass
-
-    @abstractmethod
-    def restore_state_from(self, path: str):
-        """
-        Restores state such as step, epoch and optimizer parameters
-        Args:
-          path:
-
-        Returns:
-
-        """
-        pass
-
-    @abstractmethod
-    def create_optimizer(self, optimizer, things_to_optimize, optimizer_params):
-        """
-        Creates an optimizer object to be use in the train() method.
-
-        Args:
-            optimizer: Specifies which optimizer to use.
-            things_to_optimize: A list of neural modules or tensors to be
-                optimized.
-            optimizer_params: Specifies the parameters of the optimizer
-
-        Returns:
-            Optimizer
-        """
-        pass
-
-    def _perform_on_iteration_start(self, callbacks):
-        # TODO: Most of these checks can be relaxed since we enforce callbacks
-        # to be a list of ActionCallback objects
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_iteration_start()
-
-    def _perform_on_iteration_end(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_iteration_end()
-
-    def _perform_on_action_start(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_action_start()
-
-    def _perform_on_action_end(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_action_end()
-
-    def _perform_on_epoch_start(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_epoch_start()
-
-    def _perform_on_epoch_end(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.on_epoch_end()
-
-    def _init_callbacks(self, callbacks):
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback.action = self
-
-    def _update_callbacks(
-        self, callbacks=None, registered_tensors=None,
-    ):
-        # if self.local_rank is None or self.local_rank == 0:
-        if callbacks is not None and isinstance(callbacks, List) and len(callbacks) > 0:
-            for callback in callbacks:
-                callback._registered_tensors = registered_tensors
-
-
 def _str_to_opt_level(opt_str: str) -> Optimization:
     number = int(opt_str[1:])
     if number not in Optimization._value2member_map_:
@@ -328,7 +171,7 @@ def __init__(
 
             torch.backends.cudnn.benchmark = cudnn_benchmark
             if random_seed is not None and cudnn_benchmark:
-                raise ValueError("cudnn_benchmark can not be set to True" "when random_seed is not None.")
+                raise ValueError("cudnn_benchmark can not be set to True when random_seed is not None.")
             if random_seed is not None:
                 torch.backends.cudnn.deterministic = True
                 torch.backends.cudnn.benchmark = False
@@ -336,6 +179,11 @@ def __init__(
                 np.random.seed(random_seed)
                 random.seed(random_seed)
 
+            # logging.info("Random seeds")
+            # logging.info("torch: %d", torch.initial_seed())
+            # logging.info("numpy: %d", )
+            # logging.info("random: %d", )
+
             if self._local_rank is not None:
                 torch.distributed.init_process_group(backend="nccl", init_method="env://")
 
@@ -421,8 +269,6 @@ def set_default_factory(cls, factory):
 
     @classmethod
     def reset_default_factory(cls):
-        if cls._DEFAULT:
-            cls._DEFAULT._exp_manager.reset_loggers()
         cls._DEFAULT = None
 
     @staticmethod
@@ -433,86 +279,6 @@ def __name_import(name):
             mod = getattr(mod, comp)
         return mod
 
-    @deprecated(version=0.11)
-    def __get_pytorch_module(self, name, collection, params, pretrained):
-        # TK: "factory" is not passed as parameter anymore.
-        # params["factory"] = self
-
-        if collection == "toys" or collection == "tutorials" or collection == "other":
-            constructor = NeuralModuleFactory.__name_import("nemo.backends.pytorch.tutorials." + name)
-        elif collection == "nemo_nlp":
-            constructor = NeuralModuleFactory.__name_import("nemo_nlp." + name)
-            if name == "BERT" and pretrained is True:
-                params["pretrained"] = True
-        elif collection == "nemo_asr":
-            constructor = NeuralModuleFactory.__name_import("nemo_asr." + name)
-        elif collection == "nemo_lpr":
-            constructor = NeuralModuleFactory.__name_import("nemo_lpr." + name)
-        elif collection == 'common':
-            constructor = NeuralModuleFactory.__name_import('nemo.backends.pytorch.common.' + name)
-        elif collection == "torchvision":
-            import torchvision.models as tv_models
-            import nemo.backends.pytorch.module_wrapper as mw
-            import torch.nn as nn
-
-            if name == "ImageFolderDataLayer":
-                constructor = NeuralModuleFactory.__name_import("nemo.backends.pytorch.torchvision.data." + name)
-                instance = constructor(**params)
-                return instance
-            else:
-                _nm_name = name.lower()
-                if _nm_name == "resnet18":
-                    input_ports = {
-                        "x": NeuralType(
-                            {
-                                0: AxisType(BatchTag),
-                                1: AxisType(ChannelTag),
-                                2: AxisType(HeightTag, 224),
-                                3: AxisType(WidthTag, 224),
-                            }
-                        )
-                    }
-                    output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})}
-
-                    pt_model = tv_models.resnet18(pretrained=pretrained)
-                    num_classes = params.get("num_classes", None)
-                    if num_classes is not None:
-                        pt_model.fc = nn.Linear(512, params["num_classes"])
-                    return mw.TrainableNeuralModuleWrapper(
-                        pt_nn_module=pt_model, input_ports_dict=input_ports, output_ports_dict=output_ports,
-                    )
-                elif _nm_name == "resnet50":
-                    input_ports = {
-                        "x": NeuralType(
-                            {
-                                0: AxisType(BatchTag),
-                                1: AxisType(ChannelTag),
-                                2: AxisType(HeightTag, 224),
-                                3: AxisType(WidthTag, 224),
-                            }
-                        )
-                    }
-                    output_ports = {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)})}
-
-                    pt_model = tv_models.resnet50(pretrained=pretrained)
-                    num_classes = params.get("num_classes", None)
-                    if num_classes is not None:
-                        pt_model.fc = nn.Linear(2048, params["num_classes"])
-                    return mw.TrainableNeuralModuleWrapper(
-                        pt_nn_module=pt_model, input_ports_dict=input_ports, output_ports_dict=output_ports,
-                    )
-        else:
-            collection_path = "nemo.collections." + collection + "." + name
-            constructor = NeuralModuleFactory.__name_import(collection_path)
-            if name == "BERT" and pretrained is True:
-                params["pretrained"] = True
-
-        # TK: "placement" is not passed as parameter anymore.
-        # if "placement" not in params:
-        #    params["placement"] = self._placement
-        instance = constructor(**params)
-        return instance
-
     @deprecated(version=0.11)
     def get_module(self, name, collection, params, pretrained=False):
         """
@@ -531,21 +297,6 @@ def get_module(self, name, collection, params, pretrained=False):
           NeuralModule instance
         """
 
-        # TK: "optimization_level" is not passed as parameter anymore.
-        # if params is not None and "optimization_level" in params:
-        #    if params["optimization_level"] != self._optim_level:
-        #        nemo.logging.warning(
-        #            "Module's {0} requested optimization level {1} is"
-        #            "different from the one specified by factory - {2}."
-        #            "Using: {3} for this module".format(
-        #                name, params["optimization_level"], self._optim_level, params["optimization_level"],
-        #            )
-        #        )
-        # else:
-        #    if params is None:
-        #        params = {}
-        #    params["optimization_level"] = self._optim_level
-
         if self._backend == Backend.PyTorch:
             return self.__get_pytorch_module(name=name, collection=collection, params=params, pretrained=pretrained,)
         else:
@@ -558,10 +309,11 @@ def create_optimizer(self, optimizer, things_to_optimize, optimizer_params):
 
     def train(
         self,
-        tensors_to_optimize,
+        tensors_to_optimize=None,
+        training_graph=None,
         optimizer=None,
         optimization_params=None,
-        callbacks: Optional[List[ActionCallback]] = None,
+        callbacks: Optional[List[Union[ActionCallback, NeMoCallback]]] = None,
         lr_policy=None,
         batches_per_step=None,
         stop_on_nan_loss=False,
@@ -575,6 +327,7 @@ def train(
             self.reset_trainer()
         return self._trainer.train(
             tensors_to_optimize=tensors_to_optimize,
+            training_graph=training_graph,
             optimizer=optimizer,
             optimization_params=optimization_params,
             callbacks=callbacks,
@@ -598,7 +351,7 @@ def eval(self, callbacks: List[EvaluatorCallback]):
         )
 
     def deployment_export(
-        self, module, output: str, d_format: DeploymentFormat, input_example=None, output_example=None,
+        self, module, output: str, d_format: DeploymentFormat, input_example=None, output_example=None
     ):
         """Exports Neural Module instance for deployment.
 
@@ -609,29 +362,21 @@ def deployment_export(
             input_example: sometimes tracing will require input examples
             output_example: Should match inference on input_example
         """
-        # Custom hacks: These will be put into a proper place soon
-        # We are checking type like this to avoid taking dependency on nemo_asr
-        if type(module).__name__ == "JasperEncoder":
-            # nemo.logging.warning(f"Module is JasperEncoder. We are removing"
-            #                     f"input and output length ports since they "
-            #                     f"are not needed for deployment")
-            # del module._input_ports['length']
-            # del module._output_ports['encoded_lengths']
-
-            # disable masked convolutions
-            m_count = 0
-            for m in module.modules():
-                if type(m).__name__ == "MaskedConv1d":
-                    m.use_mask = False
-                    m_count += 1
-            nemo.logging.warning(f"Turned off {m_count} masked convolutions")
+        if d_format == DeploymentFormat.JARVIS:
+            logging.info("Exporting model to Jarvis.")
+            module.deploy_to_jarvis(output=output)
+            logging.info(f"Exported to {output}")
+            return
+
+        _inexample, _out_example = module._prepare_for_deployment()
+
+        if input_example is not None:
+            _inexample = input_example
+        if output_example is not None:
+            _out_example = output_example
 
         return self._trainer.deployment_export(
-            module=module,
-            output=output,
-            d_format=d_format,
-            input_example=input_example,
-            output_example=output_example,
+            module=module, output=output, d_format=d_format, input_example=_inexample, output_example=_out_example,
         )
 
     def infer(
@@ -688,7 +433,6 @@ def clear_cache(self):
         """Helper function to clean inference cache."""
         self._trainer.clear_cache()
 
-    @deprecated(version="future")
     def _get_trainer(self, tb_writer=None):
         if self._backend == Backend.PyTorch:
             constructor = NeuralModuleFactory.__name_import("nemo.backends.pytorch.PtActions")
@@ -709,7 +453,7 @@ def _get_trainer(self, tb_writer=None):
     )
     def get_trainer(self, tb_writer=None):
         if self._trainer:
-            nemo.logging.warning(
+            logging.warning(
                 "The trainer instance was created during initialization of "
                 "Neural factory, using the already created instance."
             )
@@ -732,7 +476,7 @@ def sync_all_processes(self, status=True):
                 message on its own and exit
         """
         if self._world_size == 1:
-            nemo.logging.info("sync_all_processes does nothing if there is " "one process")
+            logging.info("sync_all_processes does nothing if there is one process")
             return
         if self._backend == Backend.PyTorch:
             import torch
@@ -740,7 +484,7 @@ def sync_all_processes(self, status=True):
             status_tensor = torch.cuda.IntTensor([status])
             torch.distributed.all_reduce(status_tensor, op=torch.distributed.ReduceOp.MIN)
             if status_tensor.item() == 0:
-                nemo.logging.error("At least one process had a failure")
+                logging.error("At least one process had a failure")
                 if status:
                     raise ValueError(
                         f"Process with global rank {self._global_rank} entered"
diff --git a/nemo/core/neural_graph.py b/nemo/core/neural_graph.py
new file mode 100644
index 000000000000..23f2b6e5a5be
--- /dev/null
+++ b/nemo/core/neural_graph.py
@@ -0,0 +1,1081 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+__all__ = [
+    'NeuralGraph',
+]
+
+from collections import OrderedDict, namedtuple
+from os import path
+from typing import Any, Dict, List, Optional, Union
+
+from ruamel.yaml import YAML
+
+from .neural_modules import OperationMode
+from nemo.backends import get_state_dict, load, save, set_state_dict
+from nemo.core.neural_interface import NeuralInterface
+from nemo.core.neural_modules import ModuleType, NeuralModule
+from nemo.core.neural_types import NeuralPortNameMismatchError, NeuralType, NmTensor
+from nemo.package_info import __version__ as nemo_version
+from nemo.utils import logging
+from nemo.utils.neural_graph.connection import Connection, StepModulePort
+from nemo.utils.neural_graph.graph_inputs import GraphInputs
+from nemo.utils.neural_graph.graph_outputs import GraphOutputs
+
+YAML = YAML(typ='safe')
+
+
+class NeuralGraph(NeuralInterface):
+    """
+        Neural Graph class stores dynamically defined graphs of connected Neural Modules.
+    """
+
+    def __init__(self, operation_mode: OperationMode = OperationMode.both, name: Optional[str] = None):
+        """
+        Constructor. Initializes graph variables.
+
+        Args:
+            operation_mode: Graph operation mode, that will be propagated along modules during graph creation.
+            [training | eval | both] (DEFAULT: both)
+            name: Name of the graph (optional)
+        """
+        # Initialize the inferface.
+        super().__init__()
+
+        # Register graph.
+        self._name = self._app_state.register_graph(self, name)
+
+        # Store name and operation mode.
+        self._operation_mode = operation_mode
+
+        # "Modules" - list of modules constituting "nodes" in a given graph.
+        self._modules = {}
+
+        # All tensors produced within this graph (dict of dicts).
+        # This stores  "all output tensors" dictionary, where the key is the name of "producer" module,
+        # and the value contains a dictionary of all tensors produced by it.
+        self._all_tensors = {}
+
+        # "Steps": order of the  execution of modules in a graph.
+        self._steps = OrderedDict()
+
+        # Bound inputs.
+        self._inputs = GraphInputs()
+
+        # Bound outputs.
+        self._outputs = GraphOutputs(self._all_tensors)
+
+        # Flag indicating whether the "default" output ports/tensors will be automatically bound.
+        self.default_output_binding = True
+
+    def __call__(self, **kwargs):
+        """
+        This method "nests" one existing neural graph into another one.
+        Also checks if all inputs were provided and properly connects them.
+
+        Args:
+            kwargs: keyword arguments containing dictionary of (input_port_name, port_content).
+        """
+        # Test operation modes of the nested graphs.
+        outer_mode = self._app_state.active_graph.operation_mode
+        inner_mode = self.operation_mode
+
+        if inner_mode == OperationMode.evaluation and outer_mode == OperationMode.training:
+            raise TypeError("Cannot nest 'inference' graph into 'training'")
+
+        if inner_mode == OperationMode.training and outer_mode == OperationMode.evaluation:
+            raise TypeError("Cannot nest 'training' graph into 'inference'")
+
+        if inner_mode == OperationMode.training and outer_mode == OperationMode.both:
+            raise TypeError("Cannot nest 'training' graph into 'both'")
+
+        if inner_mode == OperationMode.evaluation and outer_mode == OperationMode.both:
+            raise TypeError("Cannot nest 'inference' graph into 'both'")
+
+        # Check inputs: iterate through all inputs passed to the "self".
+        for port_name, port_content in kwargs.items():
+            # Make sure that passed arguments correspond to input port names.
+            if port_name not in self.input_ports.keys():
+                raise NeuralPortNameMismatchError(port_name)
+
+        # "Nest" this graph into an active graph.
+        results = self._app_state.active_graph.__nest(self, kwargs)
+
+        # Return output tensors.
+        return results
+
+    def __nest(self, inner_graph: 'NeuralGraph', inner_graph_args):
+        """
+        Method nests (copies) a graph: modules, steps, topology (tensors).
+
+        Args:
+            inner_graph: Graph to be copied (will be "nested" in this (self) graph).
+            inner_graph_args: inputs passed to the graph call.
+        """
+        # Remember the number of "already present steps".
+        step_bump = len(self.steps)
+
+        # "Copy" the modules from nested graph.
+        for key, module in inner_graph.modules.items():
+            # Check if module with that name already exists.
+            # TODO: Uncomment when we will refactor all examples so training/validation graphs won't be added
+            # to the "default" graph.
+            # if key in self._modules.keys():
+            #    raise KeyError("Neural Graph already contains a module named {}".format(module.name))
+            self._modules[key] = module
+
+        # Next we should copy the topography - i.e. produce "real" copies of tensors.
+        # In fact, instead of copying, we will produce them, following:
+        # - the execution order defined in "steps"
+        # - connectivity defined in tensor' consumers-ports
+        # (so the same logic that will be used in graph deserialization)
+
+        # So let us first serialize the connections of the nested graph.
+        # Create a list: (producer.port -> consumer.port)
+        inner_connections = []
+        for tensors in inner_graph.tensors.values():
+            for t in tensors.values():
+                inner_connections.extend(t.connections())
+
+        # We need to disable the binding of "defeault" ports on per module basis - we will "manually" produce
+        # them only for ports that are already indicated as the "bound" ones in the inner graph.
+        self.default_output_binding = False
+
+        # Now "copy" graph execution order and topology by actually executing each step of the nested graph.
+        for step_number, module_name in inner_graph.steps.items():
+            # Both module and step will be added by the modules' call().
+
+            # Get the module.
+            module = inner_graph._modules[module_name]
+
+            # Produce list of arguments that will be passed to a given modules.
+            module_args = {}
+            # Do it by:
+            # - harvesing input port names of a given module,
+            # - checking if the input was not bound (in the inner graph),
+            # - checking if we have already tensors leading to that input (in outer graph).
+            for input_port_name in module.input_ports.keys():
+                # Check if this port was bound in the inner graph.
+                key = inner_graph.inputs.has_binding(step_number, input_port_name)
+                # If so, then we must pass whatever was passed to that port in the list of arguments.
+                if key is not None:
+                    module_args[input_port_name] = inner_graph_args[key]
+                    # As a result, the "module" call() will bind this input!
+                    continue
+
+                # Else: find a tensor that should be passed to the given module's input.
+                # Search for producer/port that we should use.
+                for connection in inner_connections:
+                    if (
+                        connection.consumer.step_number == step_number
+                        and connection.consumer.module_name == module_name
+                        and connection.consumer.port_name == input_port_name
+                    ):
+                        # Got the connection!
+                        bumped_step = connection.producer.step_number + step_bump
+                        # producer_name = connection.producer.module_name
+                        producer_port_name = connection.producer.port_name
+                        break
+                # import pdb;pdb.set_trace()
+                # Now, the tensor is already produced in outer (i.e. this) graph!
+                module_args[input_port_name] = self.tensors[bumped_step][producer_port_name]
+
+            # Ok, now we have all keyword arguments. We can call() the module.
+            # This will collect all the produced output tensors and add them to this graph.
+            module(**module_args)
+
+        # At that point we have all modules, steps and tensors added to outer (self) graph.
+        # Now we have to prepare the outputs.
+
+        # This part is different from Neural Module.
+        # Now the goal is NOT to create NEW "tensors", but to return the BOUND ones!
+        # Still, those must be bound in the outer (active) graph, but using port names from the inner (nested) graph.
+
+        # Get list of "the adequate output tensors".
+        output_tensors = {}
+        # Iterate through outputs of the inner graph.
+        for key, tensor in inner_graph.output_tensors.items():
+            # Find the tensors within this (outer) graph that are outputs by the same producer-port.
+            bumped_step = tensor.producer_step_number + step_bump
+            # producer_name = tensor.producer_name
+            producer_port_name = tensor.name
+            # Get adequate tensor from "outer graph" (self).
+            output_tensors[key] = self.tensors[bumped_step][producer_port_name]
+
+        if len(output_tensors) == 1:
+            # Return a single tensor.
+            key = list(output_tensors)[0]
+            results = output_tensors[key]
+
+            # Bind the "default" output ports of the inner graph as "default" output ports of this graph.
+            # Call the bind() method of bound_outputs directly, as we already have the tensors in our graph.
+            # But: Use output port name of the inner graph!
+            self.outputs.bind([results], [key])
+
+        else:
+            # Create a named tuple type enabling to access outputs by attributes (e.g. out.x).
+            output_class_name = f'{self.__class__.__name__}Output'
+            result_type = namedtuple(typename=output_class_name, field_names=output_tensors.keys())
+
+            # Return the bound output tensors.
+            results = result_type(*output_tensors.values())
+
+            # Bind the "default" output ports of the inner graph as "default" output ports of this graph.
+            # Call the bind() method of bound_outputs directly, as we already have the tensors in our graph.
+            # But: Use output port name of the inner graph!
+            self.outputs.bind(output_tensors.values(), output_tensors.keys())
+
+        # Ok, now we can turn automatic binding on.
+        self.default_output_binding = True
+
+        # Return the results.
+        return results
+
+    def record_step(self, module: NeuralModule):
+        """
+        Records the operation (the module to be executed) on a list.
+
+        Args:
+            module: Neural modules added to a given graph.
+
+        Returns:
+            Step number.
+        """
+        # The solution allows loops in the graph.
+        # This also means that module with that name can already be present in the graph.
+        if module.name in self._modules.keys():
+            # Check if this is the same module.
+            if self._modules[module.name] is not module:
+                raise KeyError("Neural Graph already contains a different module with name `{}`!".format(module.name))
+
+        else:
+            # Add module to list of modules.
+            self._modules[module.name] = module
+
+        # Add step - store the module name.
+        step_number = len(self._steps)
+        self._steps[step_number] = module.name
+
+        # Return the current step number.
+        return step_number
+
+    @property
+    def step_number(self) -> int:
+        """
+        Returns:
+            The current step number.
+        """
+        return len(self._steps) - 1
+
+    def bind_outputs(self, tensors_list: Union[NmTensor, List[NmTensor]]):
+        """
+        Binds the output tensors.
+
+        Args:
+            tensors_list: A single tensor OR a List of tensors to be bound.
+        """
+        # Handle both single port and lists of ports to be bound.
+        if type(tensors_list) is not list:
+            tensors_list = [tensors_list]
+
+        # Add tensors to list of list of tensors.
+        for tensor in tensors_list:
+            # Add tensor to "all" tensors dictionary.
+            step_number = tensor.producer_step_number
+            if step_number not in self._all_tensors.keys():
+                self._all_tensors[step_number] = {}
+
+            port_name = tensor.name
+            # Add tensor.
+            self._all_tensors[step_number][port_name] = tensor
+
+        # Bind the tensors as graph outputs.
+        if self.default_output_binding:
+            self.outputs.bind(tensors_list)
+
+    @property
+    def inputs(self) -> GraphInputs:
+        """
+        Returns:
+            Graph input.
+        """
+        return self._inputs
+
+    @property
+    def input_ports(self) -> Dict[str, NeuralType]:
+        """
+        Returns definitions of graph input ports (dict of Neural Types).
+
+        .. note::
+            This method actually returns an immutable  dictionary with port types (like Neural Modules).
+            In order to get access to actual graph inputs please call the inputs() method.
+
+        Returns:
+            Graph input ports definitions.
+        """
+        return self._inputs.definitions
+
+    @property
+    def outputs(self) -> GraphOutputs:
+        """
+        Returns graph outputs.
+
+        Returns:
+            Graph outputs.
+        """
+        return self._outputs
+
+    @property
+    def output_ports(self) -> Dict[str, NeuralType]:
+        """
+        Returns definitions of module output ports (dict of Neural Types).
+
+        .. note::
+            This method actually returns an immutable dictionary with port types (like Neural Modules).
+            In order to get access to actual graph outpus please call the outputs() method.
+
+        Returns:
+            Graph output ports definitions.
+            
+        """
+        return self._outputs.definitions
+
+    @property
+    def output_tensors(self) -> Dict[str, NmTensor]:
+        """
+        Returns:
+            Fraph output tensors.
+        """
+        return self._outputs.tensors
+
+    @property
+    def modules(self) -> Dict[str, NeuralModule]:
+        """ Returns modules. """
+        return self._modules
+
+    def __getitem__(self, key) -> NeuralModule:
+        """ Returns module given its name (name of the variable).
+
+            Args:
+                key: Name of the variable.
+            
+            Raises:
+                KeyError: Neural Graph doesn't contain a module with a given name (key).
+        """
+        if key not in self._modules.keys():
+            raise KeyError("Neural Graph doesn't contain a module named {}".format(key))
+        return self._modules[key]
+
+    def __len__(self) -> int:
+        """
+        Returns:
+            The number of modules (vertices) in a given graph.
+        """
+        return len(self._modules)
+
+    @property
+    def steps(self) -> Dict[int, str]:
+        """
+        Returns:
+            Dictionary [steps_number, module_name]
+        """
+        return self._steps
+
+    @property
+    def tensors(self):
+        """
+        Property returning a (double) dictionary of all output tensors.
+
+        Returns:
+            Dictionary of tensors in the format [module_name][output_port_name].
+         """
+        return self._all_tensors
+
+    @property
+    def tensor_list(self) -> List[NmTensor]:
+        """
+        Property returning output tensors by extracting them on the fly from the bound outputs.
+
+        Returns:
+            List of tensors.
+        """
+        tensor_list = []
+        # Get tensors by acessing the producer-ports.
+        for tensors_per_module in self._all_tensors.values():
+            for tensor in tensors_per_module.values():
+                # Add it to the list.
+                tensor_list.append(tensor)
+        # Return the result.
+        return tensor_list
+
+    @property
+    def operation_mode(self) -> OperationMode:
+        """
+        Returns:
+            Operation mode.
+        """
+        return self._operation_mode
+
+    def __enter__(self) -> 'NeuralGraph':
+        """ 
+        Activates this graph.
+
+        Returns:
+            The graph object.
+        """
+        self._app_state.active_graph = self
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        """
+        Deactivates the current graph.
+        """
+        self._app_state.active_graph = None
+
+    def activate(self):
+        """ 
+        Activates this graph.
+        """
+        self._app_state.active_graph = self
+
+    def deactivate(self):
+        """
+        Deactivates the current graph.
+        """
+        self._app_state.active_graph = None
+
+    def export_to_config(self, config_file: str):
+        """
+        Exports the neural graph to a file.
+
+        Args:
+            config_file: Name (and path) of the config file (YML) to be written to.
+        """
+        # Greate an absolute path.
+        abs_path_file = path.expanduser(config_file)
+
+        # Serialize the graph.
+        to_export = self.serialize()
+
+        # All parameters are ok, let's export.
+        with open(abs_path_file, 'w') as outfile:
+            YAML.dump(to_export, outfile)
+
+        logging.info(
+            "Configuration of graph `{}` ({}) exported to '{}'".format(self.name, type(self).__name__, abs_path_file)
+        )
+
+    def serialize(self) -> Dict[str, Any]:
+        """
+        Method serializes the whole graph.
+
+        Returns:
+            Dictionary containing description of the whole graph.
+        """
+        # Create a dictionary representing the serialized object.
+        serialized_graph = {}
+
+        # Add "header" with module "specification".
+        serialized_graph["header"] = self.__serialize_header()
+
+        # Add modules.
+        serialized_graph["modules"] = self.__serialize_modules()
+
+        # Add steps.
+        serialized_graph["steps"] = self.__serialize_steps()
+
+        # Add connectinos.
+        serialized_graph["connections"] = self.__serialize_connections()
+
+        # Serialize graph (bound) inputs.
+        serialized_graph["inputs"] = self._inputs.serialize()
+
+        # Serialize graph (bound) outputs.
+        serialized_graph["outputs"] = self._outputs.serialize()
+
+        # Return the dictionary.
+        return serialized_graph
+
+    def __serialize_header(self) -> Dict[str, Any]:
+        """
+        Private method responsible for serializing the graph header.
+
+        Returns:
+            Dictionary containing description of the whole graph.
+        """
+        # Generate full_spec of the class.
+        full_spec = str(self.__module__) + "." + str(self.__class__.__qualname__)
+        header = {"nemo_core_version": nemo_version, "full_spec": full_spec}
+        # Add operation mode.
+        if self._operation_mode == OperationMode.training:
+            header["operation_mode"] = "training"
+        elif self._operation_mode == OperationMode.evaluation:
+            header["operation_mode"] = "inference"
+        else:
+            header["operation_mode"] = "both"
+        # Return header.
+        return header
+
+    def __serialize_modules(self) -> Dict[str, Any]:
+        """
+        Private method responsible for serializing the modules present in the graph.
+
+        Returns:
+            Dictionary containing description of all graph modules.
+        """
+        serialized_modules = {}
+        for name, module in self._modules.items():
+            serialized_modules[name] = module.serialize()
+        return serialized_modules
+
+    def __serialize_steps(self):
+        """
+        Private method responsible for serializing the steps (order of module executions).
+
+        Returns:
+            Dictionary containing description of the steps.
+        """
+        serialized_steps = {}
+        for no, module_name in self._steps.items():
+            serialized_steps[no] = module_name
+        return serialized_steps
+
+    def __serialize_connections(self) -> Dict[str, Any]:
+        """
+        Private method responsible for serializing the connections in the graph.
+
+        Returns:
+            List containing "connections" between modules.
+        """
+        serialized_connections = []
+        # Iterate through "tensor modules".
+        for tensors in self._all_tensors.values():
+            # Iterate through "tensor output ports".
+            for tensor in tensors.values():
+                # "Transform" tensor to the list of connections.
+                for c in tensor.connections():
+                    # Serialize!
+                    source = str(c.producer.step_number) + "." + c.producer.module_name + "." + c.producer.port_name
+                    target = str(c.consumer.step_number) + "." + c.consumer.module_name + "." + c.consumer.port_name
+                    ntype_str = str(tensor.ntype)
+                    serialized_connections.append(source + "->" + target + " | " + ntype_str)
+        return serialized_connections
+
+    @classmethod
+    def import_from_config(
+        cls,
+        config_file: str,
+        reuse_existing_modules: bool = False,
+        overwrite_params: Dict[str, Any] = {},
+        name: Optional[str] = None,
+    ) -> 'NeuralGraph':
+        """
+        Class method importing the neural graph from the configuration file.
+        Raises an ImportError exception when config file is invalid.
+
+        Args:
+            config_file: path (absolute or relative) and name of the config file (YML)
+            reuse_existing_modules: If the modules with (name, type, init_params) are already created, import will
+            connect to them instead of creating new instances.
+            overwrite_params: Dictionary containing parameters that will be added to or overwrite (!) the default
+            parameters loaded from the configuration file
+            name: Name of the new graph (optional, DEFAULT: NONE)
+        Returns:
+            Instance of the created NeuralGraph object.
+        """
+        logging.info("Loading configuration of a new Neural Graph from the `{}` file".format(config_file))
+
+        # Validate the content of the configuration file (its header).
+        loaded_config = cls.__validate_config_file(config_file)
+        # TODO: overwrite params?
+
+        # "Deserialize" the graph.
+        new_graph = cls.deserialize(loaded_config, reuse_existing_modules, name)
+
+        # Return the object.
+        return new_graph
+
+    @classmethod
+    def __validate_config_file(cls, config_file: str):
+        """
+        Class method validating whether the config file has a proper content (sections, specification etc.).
+        Raises an ImportError exception when config file is invalid or
+        incompatible (when called from a particular class).
+
+        Args:
+            config_file: path (absolute or relative) and name of the config file (YML)
+        Returns:
+            A loaded configuration file (dictionary).
+        """
+        # Greate an absolute path.
+        abs_path_file = path.expanduser(config_file)
+
+        # Open the config file.
+        with open(abs_path_file, 'r') as stream:
+            loaded_config = YAML.load(stream)
+
+        # Check sections.
+        for section_name in ["header", "modules", "steps", "connections", "inputs", "outputs"]:
+            if section_name not in loaded_config.keys():
+                raise ImportError(
+                    "The loaded config `{}` doesn't contain the required `{}` section".format(
+                        config_file, section_name
+                    )
+                )
+
+        # Parse the "full specification".
+        spec_list = loaded_config["header"]["full_spec"].split(".")
+
+        # Check if config contains definition of Neural Graph.
+        if spec_list[-1] != "NeuralGraph":
+            txt = "The loaded file `{}` contains configuration of ".format(config_file)
+            txt = txt + "`{}` thus cannot be used for instantiation of Neural Graph".format(spec_list[-1])
+            raise ImportError(txt)
+
+        # Success - return the loaded configuration.
+        return loaded_config
+
+    @classmethod
+    def deserialize(
+        cls, configuration: Dict[str, Any], reuse_existing_modules: bool = False, name: Optional[str] = None
+    ) -> 'NeuralGraph':
+        """
+        Class method creating a graph instance by deserializing the provided configuratino.
+
+        Args:
+            configuration: Dictionary containing serialized graph.
+            reuse_existing_modules: If the modules with (name, type, init_params) are already created, import will
+            connect to them instead of creating new instances.
+        Returns:
+            Instance of the created NeuralGraph object.
+        """
+        # Deserialize header and get object class.
+        operation_mode = cls.__deserialize_header(configuration["header"])
+
+        # Create the graph instance.
+        new_graph = NeuralGraph(operation_mode=operation_mode, name=name)
+        logging.info(
+            "Instantiated a new Neural Graph named `{}` with mode `{}`".format(
+                new_graph.name, new_graph.operation_mode
+            )
+        )
+        # Deserialize modules.
+        modules = new_graph.__deserialize_modules(configuration["modules"], reuse_existing_modules)
+
+        # Deserialize steps.
+        steps = new_graph.__deserialize_steps(configuration["steps"])
+
+        # Deserialize the connections between modules.
+        connections = new_graph.__deserialize_connections(configuration["connections"], modules)
+
+        # Deserialize input bindings - return it in an external entity.
+        inputs = GraphInputs.deserialize(configuration["inputs"], modules)
+
+        # Deserialize "manual" output bindings.
+        new_graph._outputs.deserialize(configuration["outputs"], modules)
+
+        # Now we have to execute the graph, following the steps and connections.
+        new_graph.__execute_and_create_tensors(steps, modules, connections, inputs)
+
+        # Return the graph instance.
+        return new_graph
+
+    @classmethod
+    def __deserialize_header(cls, serialized_header: Dict[str, Any]):
+        """
+        Private class method deserializing the header and extracts the general information.
+
+        Args:
+            serialized_header: Dictionary containing graph header.
+        Returns:
+            Operation mode.
+        """
+        # Parse the "full specification" - do not need that now.
+        # spec_list = serialized_header["full_spec"].split(".")
+
+        # Get operation mode.
+        if serialized_header["operation_mode"] == "training":
+            operation_mode = OperationMode.training
+        elif serialized_header["operation_mode"] == "inference":
+            operation_mode = OperationMode.evaluation
+        else:
+            operation_mode = OperationMode.both
+
+        # Return the mode.
+        return operation_mode
+
+    def __deserialize_modules(self, serialized_modules: Dict[str, Any], reuse_existing_modules: bool):
+        """
+        Private method deserializing the modules present in the graph.
+
+        Args:
+            serialized_modules: Dictionary containing graph modules.
+            reuse_existing_modules: If True, won create a new module when a module with a given name exists.
+
+        Returns:
+            Dictionary of modules.
+
+        Raises:
+            KeyError: A module with name already exists (if reuse_existing_modules is set to False).
+        """
+        modules = {}
+        for name, module_params in serialized_modules.items():
+            # Check if module already exists.
+            if self._app_state.modules.has(name):
+                # Check if we can reuse the existing modules.
+                if reuse_existing_modules:
+                    modules[name] = self._app_state.modules[name]
+                else:
+                    raise KeyError("A module with name `{}` already exists!".format(name))
+            else:
+                # Ok, create a new module.
+                modules[name] = NeuralModule.deserialize(module_params)
+        # Ok, done.
+        return modules
+
+    def __deserialize_steps(self, serialized_steps: Dict[str, Any]):
+        """
+        Private method deserializing the steps (order of module executions).
+
+        Args:
+            serialized_steps: Dictionary containing serialized steps.
+        Returns:
+            Odered dict with steps.
+        """
+        steps = OrderedDict()
+        for i in range(len(serialized_steps)):
+            steps[i] = serialized_steps[i]
+        # Ok, done.
+        return steps
+
+    def __deserialize_connections(self, serialized_connections: Dict[str, Any], modules: Dict[str, NeuralModule]):
+        """
+        Private method deserializing the connections in the graph.
+
+        Args:
+            serialized_steps: Dictionary containing serialized connections.
+            modules: List of modules.
+        Returns:
+            List of connections, in a format enabling graph traversing.
+        """
+        connections = []
+        # Deserialize connections one by one.
+        for c in serialized_connections:
+            # Deserialize!
+            [producer, consumer_type] = c.split("->")
+            [consumer, ntype_str] = consumer_type.split(" | ")
+            [producer_step, producer_name, producer_port_name] = producer.split(".")
+            [consumer_step, consumer_name, consumer_port_name] = consumer.split(".")
+            producer_mp = StepModulePort(int(producer_step), producer_name, producer_port_name)
+            consumer_mp = StepModulePort(int(consumer_step), consumer_name, consumer_port_name)
+            # Get tensor type.
+            ntype = modules[producer_name].output_ports[producer_port_name]
+            # Validate if neural type is ok.
+            assert ntype_str == str(ntype)
+
+            # Add connection.
+            connections.append(Connection(producer_mp, consumer_mp, ntype))
+        # Ok, done.
+        return connections
+
+    def __execute_and_create_tensors(self, steps, modules, connections, inputs):
+        """
+        Method creates (internal) tensors of the graph by executing it following the order and using
+        the provided connections and inputs.
+
+        Args:
+            steps: List of steps to be executed.
+            modules: List of modules.
+            connections: List of connections.
+            inputs: List of "bound inputs"
+        """
+        # Activate this graph, so all the tensors will be added to this !
+        self.activate()
+
+        # We need to disable the binding of "defeault" ports on per module basis.
+        # We will "manually" produce (e.g. deserialize) them outside of this function.
+        self.default_output_binding = False
+
+        # Now "copy" graph execution order and topology by actually executing each step of the nested graph.
+        for step, module_name in steps.items():
+            # Both module and step will be added by the modules' call().
+
+            # Get the module.
+            module = modules[module_name]
+
+            # Produce list of arguments that will be passed to a given module.
+            module_args = {}
+            # Do it by:
+            # - harvesing input port names of a given module,
+            # - checking if the input was not bound (in the inner graph),
+            # - checking if we have already tensors leading to that input (in outer graph).
+            for input_port_name in module.input_ports.keys():
+                # Check if this port was bound in the inner graph.
+                key = inputs.has_binding(step, input_port_name)
+
+                # import pdb;pdb.set_trace()
+                # If so, then we must pass the binding!
+                if key is not None:
+                    # Copy the port "definition" (i.e. is NeuralType) using the same port name.
+                    self.inputs[key] = inputs[key]
+
+                    # Pass this object to module input argument.
+                    module_args[input_port_name] = self.inputs[key]
+
+                # Else: find a tensor that should be passed to the given module's input.
+                else:
+                    # Search for producer/port that we should use.
+                    for connection in connections:
+                        if (
+                            connection.consumer.step_number == step
+                            and connection.consumer.module_name == module_name
+                            and connection.consumer.port_name == input_port_name
+                        ):
+                            # Got the connection!
+                            producer_step_number = connection.producer.step_number
+                            # producer_name = connection.producer.module_name
+                            producer_port_name = connection.producer.port_name
+                            break
+                    # Now, the tensor is already produced in outer (i.e. this) graph!
+                    module_args[input_port_name] = self.tensors[producer_step_number][producer_port_name]
+                # End: for
+
+            # Ok, now we have all keyword arguments. We can call() the module.
+            # This will collect all the produced output tensors and add them to this graph.
+            module(**module_args)
+
+        # At that point we have all modules, steps and tensors added to outer (self) graph.
+        # Now we have to prepare the outputs.
+
+        # Deactivate graph.
+        self.deactivate()
+
+        # Ok, now we can turn automatic binding on.
+        self.default_output_binding = True
+
+    def summary(self) -> str:
+        """ 
+        Returns:
+            A nice, full graph summary.
+        """
+        # Line "decorator".
+        desc = "\n" + 113 * '=' + "\n"
+        # 1. general information.
+        desc += "The `{}` Neural Graph [{}]".format(self.name, self.operation_mode)
+        if self.is_complete():
+            desc += " [COMPLETE]:\n"
+        else:
+            desc += " [INCOMPLETE]:\n"
+
+        # 2. modules.
+        desc += " * Modules ({}):\n".format(len(self._modules))
+        for key, module in self._modules.items():
+            if module.type == ModuleType.trainable and module.is_frozen():
+                desc += "    * `{}` ({}) [FROZEN]\n".format(key, type(module).__name__)
+            else:
+                desc += "    * `{}` ({})\n".format(key, type(module).__name__)
+
+        # 3. steps.
+        desc += " * Steps ({}):\n".format(len(self._steps))
+        for num, module in self._steps.items():
+            desc += "    {}. {}\n".format(num, module)
+
+        # 4. connections.
+        connections = self.__serialize_connections()
+        desc += " * Connections ({}):\n".format(len(connections))
+        # if len(connections) == 0:
+        #    desc += "    -\n"
+        for connection in connections:
+            desc += "    * {}\n".format(connection)
+
+        # 5. graph (bound) inputs.
+        inputs = self._inputs.serialize()
+        desc += " * Graph Inputs ({}):\n".format(len(inputs))
+        # if len(inputs) == 0:
+        #    desc += "    -\n"
+        for input in inputs:
+            desc += "    * {}\n".format(input)
+
+        # 6. graph (bound) outputs.
+        outputs = self._outputs.serialize()
+        desc += " * Graph Outputs ({}, {}):\n".format(len(outputs["mappings"]), outputs["type"])
+        # if len(outputs) == 0:
+        #    desc += "    -\n"
+        for output in outputs["mappings"]:
+            desc += "    * {}\n".format(output)
+        # Line "decorator".
+        desc += 113 * '='
+
+        # Return the result.
+        return desc
+
+    def freeze(self, module_names: Optional[List[str]] = None):
+        """
+        A method that freezes the weights of the trainable modules in a graph.
+
+        Args:
+            module_names: List of modules to be frozen (Optional). If passed, all modules will be unfrozen.
+        Raises:
+            KeyError: If name of the module won't be recognized.
+        """
+        # Work on all modules.
+        if module_names is None:
+            module_names = self._modules.keys()
+
+        # Iterate through modules one by one.
+        for name in module_names:
+            if name not in self._modules.keys():
+                raise KeyError("Module `{}` not present in the `{}` graph".format(name, self.name))
+            # Check module type.
+            module = self._modules[name]
+            if module.type == ModuleType.trainable:
+                # Freeze weights of the module.
+                module.freeze()
+            else:
+                logging.debug("Module `{}` is not trainable so cannot be frozen".format(name))
+
+    def unfreeze(self, module_names: Optional[List[str]] = None):
+        """
+        Unfreezes weights of the trainable modules in a graph.
+
+        Args:
+            module_names: List of modules to be unfrozen (Optional). If not passed, all modules will be unfrozen.
+        Raises:
+            KeyError: If name of the module won't be recognized.
+        """
+        # Work on all modules.
+        if module_names is None:
+            module_names = self._modules.keys()
+
+        # Iterate through modules one by one.
+        for name in module_names:
+            if name not in self._modules.keys():
+                raise KeyError("Module `{}` not present in the `{}` graph".format(name, self.name))
+            # Check module type.
+            module = self._modules[name]
+            if module.type == ModuleType.trainable:
+                # Unfreeze weights of the module.
+                module.unfreeze()
+            else:
+                logging.debug("Module `{}` is not trainable so cannot be unfrozen".format(name))
+
+    def save_to(self, filename: str, module_names: Optional[List[str]] = None):
+        """
+        Saves the state of trainable modules in the graph to a checkpoint file.
+
+        Args:
+            filename (string): Name of the file where the checkpoint will be saved.
+            module_names: List of modules to be frozen (Optional). If passed, all modules will be saved.
+        Raises:
+            KeyError: If name of the module won't be recognized.
+        """
+        # Work on all modules.
+        if module_names is None:
+            module_names = self._modules.keys()
+
+        # Prepare the "graph checkpoint".
+        chkpt = {"header": {"nemo_core_version": nemo_version, "name": self.name}, "modules": {}}
+
+        log_str = ''
+        # Iterate through the modules one by one.
+        for name in module_names:
+            if name not in self._modules.keys():
+                raise KeyError("Module `{}` not present in the `{}` graph".format(name, self.name))
+            # Check module type.
+            module = self._modules[name]
+            if module.type == ModuleType.trainable:
+                # Get module state_dict().
+                chkpt["modules"][name] = get_state_dict(module)
+                log_str += "  * Module '{}' ({}) params saved \n".format(module.name, type(module).__name__)
+            else:
+                logging.debug("Module `{}` is not trainable so cannot be saved".format(name))
+
+        # Save checkpoint.
+        save(chkpt, filename)
+        log_str = "Saved  the '{}' graph to a checkpoint `{}`:\n".format(self.name, filename) + log_str
+        logging.info(log_str)
+
+    def restore_from(self, filename: str, module_names: Optional[List[str]] = None):
+        """
+        Restores the state of trainable modules in the graph from a checkpoint file.
+
+        Args:
+            filename (string): Name of the checkpoint to be restored from.
+            module_names: List of modules to be frozen (Optional). If passed, all modules will be restored.
+        Raises:
+            KeyError: If name of the module won't be recognized.
+        """
+        # Work on all modules.
+        if module_names is None:
+            module_names = self._modules.keys()
+
+        # Load the checkpoint.
+        chkpt = load(filename)
+
+        log_str = "Loading modules constituting the '{}' graph from the `{}` checkpoint :\n".format(
+            chkpt["header"]["name"], filename
+        )
+
+        warning = False
+        # Iterate through the modules one by one.
+        for name in module_names:
+            try:
+                # Get module.
+                module = self._modules[name]
+                if module.type == ModuleType.trainable:
+                    # Restore module weights
+                    set_state_dict(module, chkpt["modules"][name])
+                    log_str += "  * Module '{}' ({}) params loaded\n".format(module.name, type(module).__name__)
+            except KeyError:
+                log_str += "  ! Module '{}' params not found in checkpoint\n".format(name)
+                warning = True
+
+        # Log results.
+        if warning:
+            logging.warning(log_str)
+        else:
+            logging.info(log_str)
+
+    def is_complete(self) -> bool:
+        """
+        Method checks if graph is "complete". In here the "complete" means that the graph has:
+            * exactly one DataLayer
+            * zero bound input ports
+
+        In short it means that the graph can be complete.
+        
+        Returns:
+            True or false.
+        """
+        has_datalayer = False
+        # Iterate through the modules one by one.
+        for module in self._modules.values():
+            # Get module.
+            if module.type == ModuleType.datalayer:
+                if has_datalayer:
+                    # More than one DL is not acceptable.
+                    return False
+                else:
+                    has_datalayer = True
+
+        # Now check the ports.
+        if len(self._inputs) != 0:
+            return False
+
+        # Else:
+        return True
diff --git a/nemo/core/neural_interface.py b/nemo/core/neural_interface.py
new file mode 100644
index 000000000000..9a374da9ef88
--- /dev/null
+++ b/nemo/core/neural_interface.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from abc import ABC, abstractmethod
+from typing import Dict
+
+import nemo
+from nemo.core.neural_types import NeuralType
+
+
+class NeuralInterface(ABC):
+    """
+        Abstract class offering interface shared between Neural Module and Neural Graph.
+        Had to move it to a separate class to:
+        a) avoid circular imports between Neural Module and Graph.
+        b) avoid collection of init_params implemented by default in Neural Module.
+        c) extract only the methods that are shared (NMs have plenty of methods that are not making any sense for
+        graph, e.g. get_weights, tie_weights, )
+    """
+
+    def __init__(self):
+        """
+            Constructor. Creates a "shortcut" to the application state.
+        """
+        # Create access to the app state.
+        self._app_state = nemo.utils.app_state.AppState()
+
+    @property
+    @abstractmethod
+    def input_ports(self) -> Dict[str, NeuralType]:
+        """ Returns definitions of module input ports
+
+        Returns:
+          A (dict) of module's input ports names to NeuralTypes mapping
+        """
+
+    @property
+    @abstractmethod
+    def output_ports(self) -> Dict[str, NeuralType]:
+        """ Returns definitions of module output ports
+
+        Returns:
+          A (dict) of module's output ports names to NeuralTypes mapping
+        """
+
+    @abstractmethod
+    def __call__(self, **kwargs):
+        """
+            This method is used during the construction of a graph for neural type compatibility checking.
+            Actual implementation lies in Neural Module and Neural Graph classes.
+
+        Returns:
+          NmTensor object or tuple of NmTensor objects
+        """
+
+    @property
+    def name(self):
+        """ Returns the object name. """
+        return self._name
diff --git a/nemo/core/neural_modules.py b/nemo/core/neural_modules.py
index 25e42c7824fa..168825e05418 100644
--- a/nemo/core/neural_modules.py
+++ b/nemo/core/neural_modules.py
@@ -1,7 +1,4 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,28 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This file contains NeuralModule and NmTensor classes."""
-__all__ = ['WeightShareTransform', 'NeuralModule']
+__all__ = ['WeightShareTransform', 'NeuralModule', 'PretrainedModelInfo', 'ModuleType', 'OperationMode']
 
-import collections
 import uuid
-from abc import ABC, abstractmethod
+from abc import abstractmethod
 from collections import namedtuple
 from enum import Enum
 from inspect import getargvalues, getfullargspec, stack
-from typing import Dict, List, Optional, Set, Tuple
-
-import nemo
-from .neural_types import (
-    CanNotInferResultNeuralType,
-    NeuralPortNameMismatchError,
-    NeuralPortNmTensorMismatchError,
-    NeuralType,
-    NeuralTypeComparisonResult,
-    NmTensor,
-)
-from nemo.core import NeuralModuleFactory
+from os import path
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+from ruamel.yaml import YAML
+
+from nemo.core.neural_factory import NeuralModuleFactory, OperationMode
+from nemo.core.neural_interface import NeuralInterface
+from nemo.core.neural_types import NeuralPortNameMismatchError, NeuralType, NmTensor
+from nemo.package_info import __version__ as nemo_version
+from nemo.utils import logging
 from nemo.utils.decorators.deprecated import deprecated
+from nemo.utils.neural_graph.connection import StepModulePort
+
+YAML = YAML(typ='safe')
+
+
+class ModuleType(Enum):
+    """ Back-end independent module types """
+
+    module = 0
+    datalayer = 1
+    trainable = 2
+    loss = 3
+    nontrainable = 4
 
 
 class WeightShareTransform(Enum):
@@ -51,11 +57,29 @@ class WeightShareTransform(Enum):
 )
 
 
-class NeuralModule(ABC):
-    """Abstract class that every Neural Module must inherit from.
+class NeuralModule(NeuralInterface):
+    """
+    Abstract class that every Neural Module must inherit from.
     """
 
-    def __init__(self):
+    def __init__(self, name=None):
+        # Initialize the inferface.
+        super().__init__()
+
+        # Retrieve dictionary of parameters (keys, values) passed to init.
+        self._init_params = self.__extract_init_params()
+
+        # Get object UUID.
+        self._uuid = str(uuid.uuid4())
+
+        # Register module and store the generated name.
+        self._name = self._app_state.register_module(self, name)
+
+        # Set "module" type as default.
+        self._type = ModuleType.module
+
+        # Set "both" as default operation mode.
+        self._operation_mode = OperationMode.both
 
         # Get default factory.
         self._factory = NeuralModuleFactory.get_default_factory()
@@ -67,79 +91,79 @@ def __init__(self):
         # Optimization level.
         self._opt_level = self._factory.optim_level
 
-        # Get object UUID.
-        self._uuid = str(uuid.uuid4())
-
-        # Retrieve dictionary of parameters (keys, values) passed to init.
-        self._init_params = self.__extract_init_params()
-
-        # Pint the types of the values.
-        # for key, value in self._init_params.items():
-        #    print("{}: {} ({})".format(key, value, type(value)))
-
-        # Validate the parameters.
-        # self._validate_params(self._init_params)
-
     @property
-    def init_params(self) -> Optional[Dict]:
+    def init_params(self) -> Dict[str, Any]:
         """
-            Property returning parameters used to instantiate the module.
+        Property returning parameters used to instantiate the module.
 
-            Returns:
-                Dictionary containing parameters used to instantiate the module.
+        Returns:
+            Dictionary containing parameters used to instantiate the module.
         """
         return self._init_params
 
-    def __extract_init_params(self):
+    def __extract_init_params(self) -> Dict[str, Any]:
         """
-            Retrieves the dictionary of of parameters (keys, values) passed to constructor of a class derived
-            (also indirectly) from the Neural Module class.
+        Retrieves the dictionary of of parameters (keys, values) passed to constructor of a class derived
+        (also indirectly) from the Neural Module class.
 
-            Returns:
-                Dictionary containing parameters passed to init().
+        Returns:
+            Dictionary containing parameters passed to init().
         """
         # Get names of arguments of the original module init method.
-        init_keys = getfullargspec(type(self).__init__).args
-
-        # Remove self.
-        if "self" in init_keys:
-            init_keys.remove("self")
+        to_set_params = getfullargspec(type(self).__init__).args
+        to_set_params.remove("self")
 
-        # Create list of params.
-        init_params = {}.fromkeys(init_keys)
+        # Create empty list of init params.
+        init_params = {}
 
-        # Retrieve values of those params from the call list.
+        # Get the frame "call context".
         for frame in stack()[1:]:
-            localvars = getargvalues(frame[0]).locals
-            # print("localvars: ", localvars)
-            for key in init_keys:
-                # Found the variable!
-                if key in localvars.keys():
-                    # Save the value.
-                    init_params[key] = localvars[key]
+            # Get the current call arguments.
+            localvars = getargvalues(frame[0])
+
+            # Fill the parameters with call arguments.
+            for key in to_set_params:
+                if key in localvars.args:
+                    init_params[key] = localvars.locals[key]
+
+            # Remove all set keys.
+            for key in init_params.keys():
+                if key in to_set_params:
+                    to_set_params.remove(key)
+
+            # Check if we have set everything.
+            if len(to_set_params) == 0:
+                break
+
+        # Make sure that we collected ALL (and ONLY) the signature params - if not, then there is a BUG!
+        if len(to_set_params) != 0:
+            raise ValueError(
+                "Could not collect all the signature params! "
+                f"Please file a bug on GitHub with the current stack trace so that it can be reproduced."
+            )
+
+        # print("! init_params of {}: {}\n".format(type(self).__name__, init_params))
 
         # Return parameters.
         return init_params
 
-    # TODO: IF part of API, should not start with _, it hidden should start with __
-    def _validate_params(self, params):
+    def __validate_params(self, params: Dict[str, Any]) -> bool:
         """
-            Checks whether dictionary contains parameters being primitive types (string, int, float etc.)
-            or (lists of)+ primitive types.
+        Checks whether dictionary contains parameters being primitive types (string, int, float etc.)
+        or (lists of)+ primitive types.
 
-            Args:
-                params: dictionary of parameters.
-
-            Returns:
-                True if all parameters were ok, False otherwise.
+        Args:
+            params: dictionary of parameters.
+        Returns:
+            True if all parameters were ok, False otherwise.
         """
         ok = True
 
         # Iterate over parameters and check them one by one.
         for key, variable in params.items():
             if not self.__is_of_allowed_type(variable):
-                nemo.logging.warning(
-                    "{} contains variable {} is of type {} which is not of a allowed.".format(
+                logging.warning(
+                    "Parameter '{}' contains a variable '{}' of type '{}' which is not allowed.".format(
                         key, variable, type(variable)
                     )
                 )
@@ -148,16 +172,19 @@ def _validate_params(self, params):
         # Return the result.
         return ok
 
-    def __is_of_allowed_type(self, var):
+    def __is_of_allowed_type(self, var) -> bool:
         """
-            A recursive function that checks if a given variable is allowed (in)
-
-            Args:
-                pretrained_model_name (str): name of pretrained model to use in order.
+        A recursive function that checks if a given variable is of allowed type.
 
-            Returns:
-                True if all parameters were ok, False otherwise.
+        Args:
+            pretrained_model_name (str): name of pretrained model to use in order.
+        Returns:
+            True if all parameters were ok, False otherwise.
         """
+        # Special case: None is also allowed.
+        if var is None:
+            return True
+
         var_type = type(var)
 
         # If this is list - check its elements.
@@ -166,7 +193,7 @@ def __is_of_allowed_type(self, var):
                 if not self.__is_of_allowed_type(list_var):
                     return False
 
-        # If this is list - check its elements.
+        # If this is dict - check its elements.
         elif var_type == dict:
             for _, dict_var in var.items():
                 if not self.__is_of_allowed_type(dict_var):
@@ -178,32 +205,340 @@ def __is_of_allowed_type(self, var):
         # Well, seems that everything is ok.
         return True
 
-    @deprecated(version=0.11)
-    @staticmethod
-    def create_ports(**kwargs):
-        """ Deprecated method, to be remoted in the next release."""
-        raise Exception(
-            'Deprecated method. Please implement ``inputs`` and ``outputs`` \
-                 properties to define module ports instead'
+    def export_to_config(self, config_file: str):
+        """
+        A function that exports module "configuration" (i.e. init parameters) to a YAML file.
+
+        Args:
+            config_file: path (absolute or relative) and name of the config file (YML)
+        Raises:
+            ValueError: An error occurred and  parameters coudn't be exported.
+        """
+        # Greate an absolute path.
+        abs_path_file = path.expanduser(config_file)
+
+        # Serialize the module.
+        to_export = self.serialize()
+
+        # All parameters are ok, let's export.
+        with open(abs_path_file, 'w') as outfile:
+            YAML.dump(to_export, outfile)
+
+        logging.info(
+            "Configuration of module `{}` ({}) exported to '{}'".format(self.name, type(self).__name__, abs_path_file)
+        )
+
+    def serialize(self) -> Dict[str, Any]:
+        """
+        A method serializing the whole Neural module (into a dictionary).
+
+        Returns:
+            Dictionary containing a "serialized" module.
+        """
+        # Create a dictionary representing the serialized object.
+        serialized_module = {}
+
+        # Add "header" with module "specification".
+        serialized_module["header"] = self.__serialize_header()
+
+        # Add init parameters.
+        serialized_module["init_params"] = self._serialize_configuration()
+
+        # Return the dictionary.
+        return serialized_module
+
+    def __serialize_header(self) -> Dict[str, Any]:
+        """
+        A protected method that creates a header stored later in the configuration file.
+            
+        Returns:
+            Dictionary containing a header with module specification.
+        """
+
+        # Get module "full specification".
+        module_full_spec = str(self.__module__) + "." + str(self.__class__.__qualname__)
+        module_class_name = type(self).__name__
+        # print(module_full_spec)
+
+        # Check whether module belongs to a collection.
+        spec_list = module_full_spec.split(".")
+
+        # Do not check Neural Modules from unit tests.
+        if spec_list[0] == "tests":
+            # Set collection variables.
+            collection_type = "tests"
+            collection_version = None
+        else:
+            # Check if component belongs to any collection
+            if len(spec_list) < 3 or (spec_list[0] != "nemo" and spec_list[1] != "collection"):
+                logging.warning(
+                    "Module `{}` does not belong to any collection. This won't be allowed in the next release.".format(
+                        module_class_name
+                    )
+                )
+                collection_type = "unknown"
+                collection_version = None
+            else:
+                # Ok, set collection.
+                collection_type = spec_list[2]
+                collection_version = None
+                # TODO: to be SET!
+                # print(getattr("nemo.collections.nlp", __version__))
+
+        # Create a "header" with module "specification".
+        header = {
+            "nemo_core_version": nemo_version,
+            "collection_type": collection_type,
+            "collection_version": collection_version,
+            # "class": module_class_name, # Operating only on full_spec now.
+            "full_spec": module_full_spec,
+        }
+        return header
+
+    def _serialize_configuration(self) -> Dict[str, Any]:
+        """
+        A function that serializes the module "configuration (i.e. init parameters) to a dictionary.
+
+        ..note:
+            Thus functions should be overloaded when writing a custom module import/export.
+
+        Returns:
+            A "serialized" dictionary with module configuration.
+        Raises:
+            A ValueError exception in case then parameters coudn't be exported.
+        """
+        # Check if generic export will work.
+        if not self.__validate_params(self._init_params):
+            raise ValueError(
+                "Generic configuration export enables to use of parameters of primitive types (string, int, float) "
+                F"or (lists of/dicts of) primitive types. Please implement your own custom `export_to_config()` and "
+                F"`import_from_config()` methods for your custom Module class."
+            )
+        # In this case configuration = init parameters.
+        return self._init_params
+
+    @classmethod
+    def import_from_config(
+        cls, config_file: str, section_name: str = None, name: str = None, overwrite_params: Dict = {}
+    ) -> 'NeuralModule':
+        """
+        Class method importing the configuration file.
+        Raises an ImportError exception when config file is invalid or
+        incompatible (when called from a particular class).
+
+        Args:
+            config_file: path (absolute or relative) and name of the config file (YML)
+            section_name: section in the configuration file storing module configuration (optional, DEFAULT: None)
+            name: name of the module that will overwrite the name in the `init_params` (optional, DEFAULT: None)
+            overwrite_params: Dictionary containing parameters that will be added to or overwrite (!)
+            the default init parameters loaded from the configuration file (the module "init_params" section).
+        Returns:
+            Instance of the created NeuralModule object.
+        """
+        logging.info("Loading configuration of a new Neural Module from the `{}` file".format(config_file))
+
+        # Validate the content of the configuration file (its header).
+        loaded_config = cls.__validate_config_file(config_file, section_name)
+
+        # "Deserialize" the module.
+        obj = cls.deserialize(loaded_config, name, overwrite_params)
+
+        # Return the new module.
+        return obj
+
+    @classmethod
+    def __validate_config_file(cls, config_file: str, section_name: str = None) -> Dict[str, Any]:
+        """
+        Class method validating whether the config file has a proper content (sections, specification etc.).
+        Raises an ImportError exception when config file is invalid or
+        incompatible (when called from a particular class).
+
+        Args:
+            config_file: path (absolute or relative) and name of the config file (YML)
+            section_name: section in the configuration file storing module configuration (optional, DEFAULT: None)
+        Returns:
+            A loaded configuration file (dictionary).
+        """
+        # Greate an absolute path.
+        abs_path_file = path.expanduser(config_file)
+
+        # Open the config file.
+        with open(abs_path_file, 'r') as stream:
+            loaded_config = YAML.load(stream)
+
+        # Check section.
+        if section_name is not None:
+            if section_name not in loaded_config:
+                raise ImportError(
+                    "The loaded config `{}` doesn't contain the indicated `{}` section".format(
+                        config_file, section_name
+                    )
+                )
+            # Section exists - use only it for configuration.
+            loaded_config = loaded_config[section_name]
+
+        # Make sure that the config is valid.
+        if "header" not in loaded_config:
+            raise ImportError("The loaded config `{}` doesn't contain the `header` section".format(config_file))
+
+        if "init_params" not in loaded_config:
+            raise ImportError("The loaded config `{}` doesn't contain the `init_params` section".format(config_file))
+
+        # Parse the "full specification".
+        spec_list = loaded_config["header"]["full_spec"].split(".")
+
+        # Check if config contains data of a compatible class.
+        if not issubclass(cls.__deserialize_header(loaded_config["header"]), cls):
+            txt = "The loaded file `{}` contains configuration of ".format(config_file)
+            txt = txt + "`{}` thus cannot be used for instantiation of an object of type `{}`".format(
+                spec_list[-1], cls.__name__
+            )
+            raise ImportError(txt)
+
+        # Success - return configuration.
+        return loaded_config
+
+    @classmethod
+    def deserialize(
+        cls, configuration: Dict[str, Any], name: str = None, overwrite_params: Dict[str, Any] = {}
+    ) -> 'NeuralModule':
+        """
+        Class method instantiating the neural module object based on the configuration (dictionary).
+
+        Args:
+            configuration: Dictionary containing proper "header" and "init_params" sections.
+
+            name: name of the module that will overwrite the name in the `init_params` (optional, DEFAULT: None)
+
+            overwrite_params: Dictionary containing parameters that will be added to or overwrite (!)
+            the default init parameters loaded from the configuration file (the module "init_params" section).
+
+        Returns:
+            Instance of the created NeuralModule object.
+        """
+        # Deserialize header - get object class.
+        module_class = cls.__deserialize_header(configuration["header"])
+
+        # Update parameters with additional ones.
+        configuration["init_params"].update(overwrite_params)
+
+        # Override module name in init_params using the logic:
+        #  * section_name if not none overrides init_params.name first (skipped for now, TOTHINK!)
+        #  * name (if None) overrides init_params.name
+        if name is not None:
+            configuration["init_params"]["name"] = name
+
+        # Get init parameters.
+        init_params = cls._deserialize_configuration(configuration["init_params"])
+
+        # Create the module instance.
+        new_module = module_class(**init_params)
+        logging.info(
+            "Instantiated a new Neural Module named `{}` of type `{}`".format(
+                new_module.name, type(new_module).__name__
+            )
         )
 
+        # Return the module instance.
+        return new_module
+
+    @classmethod
+    def __deserialize_header(cls, serialized_header: Dict[str, Any]):
+        """
+        Method deserializes the header and extracts the module class.
+
+        Args:
+            serialized_header: Dictionary containing module header.
+        Returns:
+            Class of the module to be created.
+        """
+        # Parse the "full specification".
+        spec_list = serialized_header["full_spec"].split(".")
+
+        # Get module class from the "full specification".
+        mod_obj = __import__(spec_list[0])
+        for spec in spec_list[1:]:
+            mod_obj = getattr(mod_obj, spec)
+
+        # Return "class".
+        return mod_obj
+
+    @classmethod
+    def _deserialize_configuration(cls, serialized_init_params: Dict[str, Any]):
+        """
+        A function that deserializes the module "configuration (i.e. init parameters).
+
+        ..note:
+            Thus functions should be overloaded when writing a custom module import/export.
+
+        Args:
+            serialized_init_params: List of init parameters loaded from the file.
+        Returns:
+            A "deserialized" list with init parameters.
+        """
+        # In this case configuration = init parameters.
+        return serialized_init_params
+
     @property
     @abstractmethod
-    def input_ports(self) -> Optional[Dict[str, NeuralType]]:
-        """Returns definitions of module input ports
+    def input_ports(self) -> Dict[str, NeuralType]:
+        """
+        Returns definitions of module input ports
 
         Returns:
-          A (dict) of module's input ports names to NeuralTypes mapping
+          A dictionary containing module's input ports (names, NeuralTypes) mapping.
         """
 
     @property
     @abstractmethod
-    def output_ports(self) -> Optional[Dict[str, NeuralType]]:
-        """Returns definitions of module output ports
+    def output_ports(self) -> Dict[str, NeuralType]:
+        """
+        Returns definitions of module output ports
+
+        Returns:
+          A dictionary containing module's output ports (names, NeuralTypes) mapping.
+        """
+
+    @property
+    def _disabled_deployment_input_ports(self) -> Set[str]:
+        """Returns names of input ports that will not be included in an export
+
+        Returns:
+          A (set) of module's input port names that are not exportable
+        """
+        return set([])
+
+    @property
+    def _disabled_deployment_output_ports(self) -> Set[str]:
+        """Returns names of output ports that will not be included in an export
+
+        Returns:
+          A (set) of module's output port names that are not exportable
+        """
+        return set([])
+
+    def _prepare_for_deployment(self) -> None:
+        """Patch the module if required to prepare for deployment
 
         Returns:
-          A (dict) of module's output ports names to NeuralTypes mapping
+            (Optional) input and output example tensors
         """
+        return None, None
+
+    @property
+    def operation_mode(self):
+        """ Returns the operation mode. """
+        return self._operation_mode
+
+    @property
+    def type(self):
+        """ Returns the type of module. """
+        return self._type
+
+    @operation_mode.setter
+    def operation_mode(self, operation_mode: OperationMode):
+        """ Sets the operation mode. """
+        self._operation_mode = operation_mode
 
     @staticmethod
     def pretrained_storage():
@@ -227,89 +562,113 @@ def __call__(self, **kwargs):
         Returns:
           NmTensor object or tuple of NmTensor objects
         """
-        # Get input and output ports definitions.
-        input_port_defs = self.input_ports
-        output_port_defs = self.output_ports
-
-        first_input_nmtensor_type = None
-        input_nmtensors_are_of_same_type = True
-        for port_name, tgv in kwargs.items():
-            # make sure that passed arguments correspond to input port names
-            if port_name not in input_port_defs.keys():
-                raise NeuralPortNameMismatchError("Wrong input port name: {0}".format(port_name))
-
-            input_port = input_port_defs[port_name]
-            type_comatibility = input_port.compare(tgv)
-            if (
-                type_comatibility != NeuralTypeComparisonResult.SAME
-                and type_comatibility != NeuralTypeComparisonResult.GREATER
-            ):
-                raise NeuralPortNmTensorMismatchError(
-                    "\n\nIn {0}. \n"
-                    "Port: {1} and a NmTensor it was fed are \n"
-                    "of incompatible neural types:\n\n{2} \n\n and \n\n{3}"
-                    "\n\nType comparison result: {4}".format(
-                        self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility,
-                    )
+        # print(" Neural Module:__call__")
+
+        # Set the operation mode of the outer graph.
+        self.operation_mode = self._app_state.active_graph.operation_mode
+        # The input and output ports definitions can potentially depend on the operation mode!
+
+        # Record the operation (i.e. add a single module).
+        step_number = self._app_state.active_graph.record_step(self)
+
+        ###### PROCESS INPUTS. ######
+        # Iterate through all passed parameters.
+        for port_name, port_content in kwargs.items():
+            # Make sure that passed arguments corresponds to one of the input port names.
+            if port_name not in self.input_ports.keys():
+                raise NeuralPortNameMismatchError(port_name)
+
+            # At that point the input can be one of three types:
+            # * NeuralGraph -> bind port using the default name and type.
+            # * GraphInput -> check definition, if ok bind port.
+            # * NmTensor -> check definition, add self as a "consumer" of a tensor (produced by other module).
+
+            # Check what was actually passed.
+            if type(port_content).__name__ == "NeuralGraph":
+                # Make sure that port_content is the currently active graph!
+                if port_content is not self._app_state.active_graph:
+                    raise ConnectionError("Ports can be bound only by passing the active graph object!")
+                # Create an alias so the logic will be more clear.
+                active_graph = port_content
+
+                # This case: we are nesting one graph into another and must bind input port of one graph in another!
+                # So generally we must "copy" the of thus module to graog (the inverted logic!).
+
+                # Copy the port "definition" (i.e. is NeuralType) using the same port name.
+                active_graph.inputs[port_name] = self.input_ports[port_name]
+
+                # Bind the neural graph input port, i.e. remember that a given graph port should pass data
+                # to THIS module-port (when it finally will be connected).
+                active_graph.inputs[port_name].bind(StepModulePort(step_number, self.name, port_name))
+
+                # Please note that there are no "consumers" here - this is a "pure binding".
+
+            elif type(port_content).__name__ == "GraphInput":
+
+                # Check if GraphInput belongs to the active graph !
+                own_port = False
+                for gcontent in self._app_state.active_graph.inputs.values():
+                    if gcontent is port_content:
+                        own_port = True
+                        break
+                if not own_port:
+                    raise NeuralPortNameMismatchError(port_name)
+
+                # Compare input port definition with the received definition.
+                self.input_ports[port_name].compare_and_raise_error(
+                    self.__class__.__name__, port_name, port_content.ntype
                 )
 
-            # if first_input_nmtensor_type is None:
-            #     first_input_nmtensor_type = NeuralType(tgv._axis2type)
-            # else:
-            #     if first_input_nmtensor_type._axis2type is None:
-            #         input_nmtensors_are_of_same_type = True
-            #     else:
-            #         input_nmtensors_are_of_same_type = first_input_nmtensor_type.compare(
-            #             tgv
-            #         ) == NeuralTypeComparisonResult.SAME and len(first_input_nmtensor_type._axis2type)
-            # if not (
-            #     type_comatibility == NeuralTypeComparisonResult.SAME
-            #     or type_comatibility == NeuralTypeComparisonResult.GREATER
-            # ):
-            #     raise NeuralPortNmTensorMismatchError(
-            #         "\n\nIn {0}. \n"
-            #         "Port: {1} and a NmTensor it was fed are \n"
-            #         "of incompatible neural types:\n\n{2} \n\n and \n\n{3}"
-            #         "\n\nType comparison result: {4}".format(
-            #             self.__class__.__name__, port_name, input_port_defs[port_name], tgv, type_comatibility,
-            #         )
-            #     )
-            # if type_comatibility == NeuralTypeComparisonResult.LESS:
-            #     print('Types were raised')
+                # Bind the neural graph input port, i.e. remember that a given graph port should pass data
+                # to THIS module-port (when it finally will be connected).
+                port_content.bind(StepModulePort(step_number, self.name, port_name))
+
+                # Please note that there are no "consumers" here - this is a "pure binding".
+
+            elif type(port_content) is NmTensor:
+                # Compare input port definition with the received definition.
+                self.input_ports[port_name].compare_and_raise_error(self.__class__.__name__, port_name, port_content)
 
+                # Ok, the goal here is to actually "connect": add self (module) as "consumer" to the input tensor.
+                port_content.add_consumer(StepModulePort(step_number, self.name, port_name))
+            else:
+                raise TypeError(
+                    "Input '{}' must be of one of three types: NeuralGraph, GraphInput or NmTensor".format(port_name)
+                )
+
+        ###### PRODUCE OUTPUTS. ######
+        output_port_defs = self.output_ports
+        # Create output tensors.
         if len(output_port_defs) == 1:
+            # Get port name and type.
             out_name = list(output_port_defs)[0]
             out_type = output_port_defs[out_name]
-            if out_type is None:
-                if input_nmtensors_are_of_same_type:
-                    out_type = first_input_nmtensor_type
-                else:
-                    raise CanNotInferResultNeuralType(
-                        "Can't infer output neural type." "Likely your inputs are of " "different type."
-                    )
-            return NmTensor(producer=self, producer_args=kwargs, name=out_name, ntype=out_type,)
+
+            # Create a single returned tensor.
+            results = NmTensor(producer=self, producer_args=kwargs, output_port_name=out_name, ntype=out_type,)
+
+            # Bind the "default" output ports.
+            self._app_state.active_graph.bind_outputs(results)
         else:
-            result = []
-            for out_port, n_type in output_port_defs.items():
-                out_type = n_type
-                if out_type is None:
-                    if input_nmtensors_are_of_same_type:
-                        out_type = first_input_nmtensor_type
-                    else:
-                        raise CanNotInferResultNeuralType(
-                            "Can't infer output neural type." "Likely your inputs are of " "different type."
-                        )
-                result.append(NmTensor(producer=self, producer_args=kwargs, name=out_port, ntype=out_type,))
-
-            # Creating ad-hoc class for returning from module's forward pass.
+            # Create output tensors.
+            output_tensors = []
+            for out_name, out_type in output_port_defs.items():
+                output_tensors.append(
+                    NmTensor(producer=self, producer_args=kwargs, output_port_name=out_name, ntype=out_type,)
+                )
+
+            # Create a named tuple type enabling to access outputs by attributes (e.g. out.x).
             output_class_name = f'{self.__class__.__name__}Output'
-            field_names = list(output_port_defs)
-            result_type = collections.namedtuple(typename=output_class_name, field_names=field_names,)
+            result_type = namedtuple(typename=output_class_name, field_names=output_port_defs.keys())
+
+            # Create the returned tuple object.
+            results = result_type(*output_tensors)
 
-            # Tie tuple of output tensors with corresponding names.
-            result = result_type(*result)
+            # Bind the output tensors.
+            self._app_state.active_graph.bind_outputs(output_tensors)
 
-            return result
+        # Return the results.
+        return results
 
     def __str__(self):
         return self.__class__.__name__
diff --git a/nemo/core/neural_types/axes.py b/nemo/core/neural_types/axes.py
index 1b3159815a90..073b215e1a4d 100644
--- a/nemo/core/neural_types/axes.py
+++ b/nemo/core/neural_types/axes.py
@@ -45,6 +45,9 @@ class AxisKind(AxisKindAbstract):
     Height = 4
     Any = 5
 
+    def __repr__(self):
+        return self.__str__()
+
     def __str__(self):
         return str(self.name).lower()
 
@@ -83,3 +86,12 @@ def __init__(self, kind: AxisKindAbstract, size: Optional[int] = None, is_list=F
         self.kind = kind
         self.size = size
         self.is_list = is_list
+
+    def __repr__(self):
+        if self.size is None:
+            representation = str(self.kind)
+        else:
+            representation = f"{str(self.kind)}:{self.size}"
+        if self.is_list:
+            representation += "_listdim"
+        return representation
diff --git a/nemo/core/neural_types/elements.py b/nemo/core/neural_types/elements.py
index 5d410b90ebde..945506065a34 100644
--- a/nemo/core/neural_types/elements.py
+++ b/nemo/core/neural_types/elements.py
@@ -34,6 +34,7 @@
     'LengthsType',
     'EmbeddedTextType',
     'EncodedRepresentation',
+    'MaskType',
 ]
 import abc
 from abc import ABC, abstractmethod
@@ -49,6 +50,9 @@ class ElementType(ABC):
     def __str__(self):
         self.__doc__
 
+    def __repr__(self):
+        return self.__class__.__name__
+
     @property
     def type_parameters(self) -> Dict:
         """Override this property to parametrize your type. For example, you can specify 'storage' type such as
@@ -185,3 +189,7 @@ class RegressionValuesType(PredictionsType):
 
 class CategoricalValuesType(PredictionsType):
     """Element type to represent labels for categorical classification task"""
+
+
+class MaskType(PredictionsType):
+    """Element type to represent boolean mask"""
diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py
index b36d0c3eba5f..699d87c99662 100644
--- a/nemo/core/neural_types/neural_type.py
+++ b/nemo/core/neural_types/neural_type.py
@@ -21,14 +21,15 @@
     'NeuralTypeError',
     'NeuralPortNameMismatchError',
     'NeuralPortNmTensorMismatchError',
-    'CanNotInferResultNeuralType',
 ]
 import uuid
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 
 from nemo.core.neural_types.axes import AxisKind, AxisType
 from nemo.core.neural_types.comparison import NeuralTypeComparisonResult
 from nemo.core.neural_types.elements import *
+from nemo.utils.app_state import AppState
+from nemo.utils.neural_graph.connection import Connection, StepModulePort
 
 
 class NeuralType(object):
@@ -46,10 +47,11 @@ class NeuralType(object):
     """
 
     def __str__(self):
-        return (
-            f"axes: {[(c.kind, c.size, c.is_list) for c in self.axes]}\n"
-            f"elements_type: {self.elements_type.__class__.__name__}"
-        )
+
+        if self.axes is not None:
+            return f"axes: {self.axes}; elements_type: {self.elements_type.__class__.__name__}"
+        else:
+            return f"axes: None; elements_type: {self.elements_type.__class__.__name__}"
 
     def __init__(self, axes: Optional[Tuple] = None, elements_type: ElementType = VoidType(), optional=False):
         if not isinstance(elements_type, ElementType):
@@ -111,6 +113,15 @@ def compare(self, second) -> NeuralTypeComparisonResult:
         else:
             return NeuralTypeComparisonResult.INCOMPATIBLE
 
+    def compare_and_raise_error(self, parent_type_name, port_name, second_object):
+        """ Method compares definition of one type with another and raises an error if not compatible. """
+        type_comatibility = self.compare(second_object)
+        if (
+            type_comatibility != NeuralTypeComparisonResult.SAME
+            and type_comatibility != NeuralTypeComparisonResult.GREATER
+        ):
+            raise NeuralPortNmTensorMismatchError(parent_type_name, port_name, self, second_object, type_comatibility)
+
     @staticmethod
     def __check_sanity(axes):
         # check that list come before any tensor dimension
@@ -191,7 +202,7 @@ class NmTensor(NeuralType):
     It also has a type of NeuralType represented by inheriting from NeuralType
     object."""
 
-    def __init__(self, producer, producer_args, name, ntype=None):
+    def __init__(self, producer, producer_args, output_port_name, ntype=None):
         """NmTensor constructor.
 
         Args:
@@ -200,10 +211,20 @@ def __init__(self, producer, producer_args, name, ntype=None):
             of arguments which were sent to producer to create this
         """
         super(NmTensor, self).__init__(axes=ntype.axes, elements_type=ntype.elements_type, optional=ntype.optional)
-        self._producer = producer
+        # producer is None: a special case present in some of the unit tests.
+        if producer is None:
+            self._producer_name = "None"
+        else:
+            self._producer_name = producer.name
         self._producer_args = producer_args
-        self._name = name
+        self._output_port_name = output_port_name
+        self._name = output_port_name
         self._uuid = str(uuid.uuid4())
+        # Remember step at which this tensor was created.
+        self._step_number = AppState().active_graph.step_number
+        # List of tuples (step number, module name, input port name)
+        self._consumers = []
+        AppState().tensor_names.register(self)
 
     @property
     def producer(self):
@@ -211,7 +232,67 @@ def producer(self):
         Returns:
           NeuralModule object which produced this NmTensor.
         """
-        return self._producer
+        return AppState().modules[self._producer_name]
+
+    @property
+    def producer_name(self) -> str:
+        """
+        Returns:
+            Name of the producer of the tensor.
+        """
+        return self._producer_name
+
+    @property
+    def producer_step_number(self) -> int:
+        """
+        Returns:
+            Step number indicating when the tensor was produced.
+            (It also indicates who produced the tensor.)
+        """
+        return self._step_number
+
+    @property
+    def producer_step_module_port(self) -> StepModulePort:
+        """
+        Returns:
+          A tuple containing step number, module name and corresponding output port name.
+        """
+        return StepModulePort(self._step_number, self._producer_name, self._output_port_name)
+
+    @property
+    def consumers(self) -> List[StepModulePort]:
+        """
+        Returns:
+          A list of tuples containing consumer step number, module name and corresponding input port names.
+        """
+        return self._consumers
+
+    def add_consumer(self, step_module_port: StepModulePort):
+        """
+        Adds the "consumer" to tensor.
+
+        Args:
+            step_port: Step number, module name and module's input port.
+        """
+        self._consumers.append(step_module_port)
+
+    @property
+    def ntype(self):
+        """
+        Returns:
+            Neural Type associated with this NmTensor.
+        """
+        return NeuralType(axes=self.axes, elements_type=self.elements_type, optional=self.optional)
+
+    def connections(self):
+        """
+            "Serializes" the tensor to a list of connections (step/producer/port, step/consumer/port).
+
+        """
+        connections = []
+        for con_mod_port in self._consumers:
+            connections.append(Connection(self.producer_step_module_port, con_mod_port, self.ntype))
+        return connections
 
     @property
     def producer_args(self):
@@ -240,9 +321,22 @@ def unique_name(self):
         Returns:
           str: unique name
         """
-        if self._producer is None:
+        if self._producer_name is None:
             raise ValueError("This NmTensor does not have a unique name")
-        return f"{self._name}~~~{self.producer}~~~{self._uuid}"
+        return f"{self._output_port_name}~~~{self._producer_name}~~~{self._uuid}"
+
+    def rename(self, new_name: str):
+        """Renames the tensor from its old name to a new user-defined name for easy access within callbacks. Note,
+        a tensor's unique_name is never changed. This simply adds a reference from new_name -> tensor.unique_name
+
+        args:
+            new_name (str): the new tensor's name.
+        """
+        AppState().tensor_names.rename_NmTensor(self, new_name)
+        self._name = new_name
+
+    def __str__(self):
+        return self.name
 
 
 class NeuralTypeError(Exception):
@@ -255,20 +349,15 @@ class NeuralPortNameMismatchError(NeuralTypeError):
     """Exception raised when neural module is called with incorrect port
     names."""
 
-    def __init__(self, message):
-        self.message = message
+    def __init__(self, input_port_name):
+        self.message = "Wrong input port name: {0}".format(input_port_name)
 
 
 class NeuralPortNmTensorMismatchError(NeuralTypeError):
     """Exception raised when a port is fed with a NmTensor of incompatible
     type."""
 
-    def __init__(self, message):
-        self.message = message
-
-
-class CanNotInferResultNeuralType(NeuralTypeError):
-    """Exception raised when NeuralType of output can not be inferred."""
-
-    def __init__(self, message):
-        self.message = message
+    def __init__(self, class_name, port_name, first_type, second_type, type_comatibility):
+        self.message = "\nIn {}. \nPort: {} and a NmTensor it was fed are \n".format(class_name, port_name)
+        self.message += "of incompatible neural types:\n\n{} \n\n and \n\n{}".format(first_type, second_type)
+        self.message += "\n\nType comparison result: {}".format(type_comatibility)
diff --git a/nemo/package_info.py b/nemo/package_info.py
index e589578de875..2dd69c304dec 100644
--- a/nemo/package_info.py
+++ b/nemo/package_info.py
@@ -16,9 +16,9 @@
 # limitations under the License.
 
 MAJOR = 0
-MINOR = 9
+MINOR = 11
 PATCH = 0
-PRE_RELEASE = ''
+PRE_RELEASE = 'b2'
 
 # Use the following formatting: (major, minor, patch, pre-release)
 VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
@@ -34,4 +34,4 @@
 __download_url__ = 'https://github.com/NVIDIA/NeMo/releases'
 __description__ = 'NEMO core package. Necessary for all collections'
 __license__ = 'Apache2'
-__keywords__ = 'deep learning, machine learning, gpu, NLP, NeMo, nvidia, ' 'pytorch, torch, tts, speech, language '
+__keywords__ = 'deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, tts, speech, language'
diff --git a/nemo/utils/__init__.py b/nemo/utils/__init__.py
index ef398e88b6b9..15872561c92a 100644
--- a/nemo/utils/__init__.py
+++ b/nemo/utils/__init__.py
@@ -1,7 +1,6 @@
-# ! /usr/bin/python
 # -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,6 +15,11 @@
 # limitations under the License.
 # =============================================================================
 
+from nemo.utils.nemo_logging import Logger as _Logger
+from nemo.utils.nemo_logging import LogMode as logging_mode
+
+logging = _Logger()
+
 from .argparse import NemoArgParser
 from .exp_logging import ExpManager, get_logger
 from .helpers import *
diff --git a/nemo/utils/app_state.py b/nemo/utils/app_state.py
new file mode 100644
index 000000000000..45c134ee9995
--- /dev/null
+++ b/nemo/utils/app_state.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# Sadly have to import the whole "nemo" python module to avoid circular dependencies.
+# Moreover, at that point nemo module doesn't contain "core", so during "python module registration"
+# nothing from nemo.core, including e.g. types (so we cannot use them for "python 3 type hints").
+import nemo
+from nemo.utils.metaclasses import Singleton
+from nemo.utils.neural_graph.neural_graph_manager import NeuralGraphManager
+from nemo.utils.neural_graph.object_registry import ObjectRegistry
+from nemo.utils.nmtensor_registry import NmTensorNameRegistry
+
+
+class AppState(metaclass=Singleton):
+    """
+        Application state stores variables important from the point of view of execution of the NeMo application.
+        Staring from the most elementary (epoch number, episode number, device used etc.) to the currently
+        active graph etc.
+    """
+
+    def __init__(self, device=None):
+        """
+            Constructor. Initializes global variables.
+
+            Args:
+                device: main device used for computations [CPU | GPU] (DEFAULT: GPU)
+        """
+        # Had to set it to None in argument to avoid circular import at the class initialization phase.
+        if device is None:
+            self._device = nemo.core.DeviceType.GPU
+        else:
+            self._device = device
+        # Create module registry.
+        self._module_registry = ObjectRegistry("module")
+        # Create graph manager (registry with some additional functionality).
+        self._neural_graph_manager = NeuralGraphManager()
+        # Create NmTensor registry
+        self._nmtensor_name_registry = NmTensorNameRegistry()
+
+    @property
+    def tensor_names(self):
+        """ Property returning the NmTensorNameRegistry which maps user-defined names to tensor's unique_names.
+
+            Returns:
+                NmTensorNameRegistry.
+        """
+        return self._nmtensor_name_registry
+
+    @property
+    def modules(self):
+        """
+            Property returning the existing modules.
+
+            Returns:
+                Existing modules (a set object).
+        """
+        return self._module_registry
+
+    @property
+    def graphs(self):
+        """ Property returning the existing graphs.
+
+            Returns:
+                Existing graphs (a set object).
+        """
+        return self._neural_graph_manager
+
+    def register_module(self, module, name: str) -> str:
+        """
+            Registers a module using the provided name.
+            If name is none - generates a new unique name.
+
+            Args:
+                module: A Neural Module object to be registered.
+                name: A "proposition" of module name.
+
+            Returns:
+                A unique name (proposition or newly generated name).
+        """
+        return self._module_registry.register(module, name)
+
+    def register_graph(self, graph, name: str) -> str:
+        """
+            Registers a new graph using the provided name.
+            If name is none - generates a new unique name.
+
+            Args:
+                graph: A Neural Graph object to be registered.
+                name: A "proposition" of graph name.
+
+            Returns:
+                A unique name (proposition or newly generated name).
+        """
+        return self._neural_graph_manager.register(graph, name)
+
+    @property
+    def active_graph(self):
+        """ Property returns the active graph.
+
+            Returns:
+                Active graph.
+        """
+        return self._neural_graph_manager.active_graph
+
+    @active_graph.setter
+    def active_graph(self, graph):
+        """ Property sets the active graph.
+
+            Args:
+                graph: Neural graph object that will become active.
+        """
+        self._neural_graph_manager.active_graph = graph
diff --git a/nemo/utils/argparse.py b/nemo/utils/argparse.py
index 5bb1bf298672..be678e4c5a1a 100644
--- a/nemo/utils/argparse.py
+++ b/nemo/utils/argparse.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import argparse
+import os
 
 
 class NemoArgParser(argparse.ArgumentParser):
@@ -17,7 +18,7 @@ def __init__(self, **kwargs):
         super().__init__(**kwargs)
         # NeMo arguments
         self.add_argument(
-            "--local_rank", default=None, type=int, help="node rank for distributed training",
+            "--local_rank", default=os.getenv('LOCAL_RANK', None), type=int, help="node rank for distributed training",
         )
         self.add_argument(
             "--amp_opt_level",
@@ -83,13 +84,13 @@ def __init__(self, **kwargs):
             "--num_epochs",
             type=int,
             default=None,
-            help="number of epochs to train. You should specify" "either num_epochs or max_steps",
+            help="number of epochs to train. You should specify either num_epochs or max_steps",
         )
         self.add_argument(
             "--max_steps",
             type=int,
             default=None,
-            help="max number of steps to train. You should " "specify either num_epochs or max_steps",
+            help="max number of steps to train. You should specify either num_epochs or max_steps",
         )
         self.add_argument("--lr", type=float, default=1e-3, help="base learning rate")
         self.add_argument(
@@ -101,7 +102,7 @@ def __init__(self, **kwargs):
             "--iter_per_step",
             default=1,
             type=int,
-            help="number of gradients accumulation iterations " "per weights update step",
+            help="number of gradients accumulation iterations per weights update step",
         )
 
         # Logging arguments
diff --git a/nemo/utils/decorators/__init__.py b/nemo/utils/decorators/__init__.py
index a10308813138..d94b5c94f9f7 100644
--- a/nemo/utils/decorators/__init__.py
+++ b/nemo/utils/decorators/__init__.py
@@ -13,3 +13,4 @@
 # limitations under the License.
 
 from .deprecated import deprecated
+from .port_docs import add_port_docs
diff --git a/nemo/utils/decorators/deprecated.py b/nemo/utils/decorators/deprecated.py
index 862d99ac7ba1..d738c8a18031 100644
--- a/nemo/utils/decorators/deprecated.py
+++ b/nemo/utils/decorators/deprecated.py
@@ -20,7 +20,9 @@
 
 import wrapt
 
-import nemo
+from nemo.utils import logging
+
+# from nemo.utils import logging
 
 # Remember which deprecation warnings have been printed already.
 _PRINTED_WARNING = {}
@@ -63,7 +65,7 @@ def wrapper(wrapped, instance, args, kwargs):
                 msg = msg + " " + explanation
 
             # Display the deprecated warning.
-            nemo.logging.warning(msg)
+            logging.warning(msg)
 
         # Call the function.
         return wrapped(*args, **kwargs)
diff --git a/nemo/utils/decorators/port_docs.py b/nemo/utils/decorators/port_docs.py
new file mode 100644
index 000000000000..731ce27f619c
--- /dev/null
+++ b/nemo/utils/decorators/port_docs.py
@@ -0,0 +1,89 @@
+# Copyright (C) NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The "add_port_docs" decorator is needed to nicely generate neural types in Sphynx for input and output ports
+
+__all__ = [
+    'add_port_docs',
+]
+
+import functools
+import sys
+
+import wrapt
+
+
+def _normalize_docstring(docstring):
+    """Normalizes the docstring.
+    Replaces tabs with spaces, removes leading and trailing blanks lines, and
+    removes any indentation.
+    Copied from PEP-257:
+    https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
+    Args:
+        docstring: the docstring to normalize
+    Returns:
+        The normalized docstring
+    """
+    if not docstring:
+        return ''
+    # Convert tabs to spaces (following the normal Python rules)
+    # and split into a list of lines:
+    lines = docstring.expandtabs().splitlines()
+    # Determine minimum indentation (first line doesn't count):
+    # (we use sys.maxsize because sys.maxint doesn't exist in Python 3)
+    indent = sys.maxsize
+    for line in lines[1:]:
+        stripped = line.lstrip()
+        if stripped:
+            indent = min(indent, len(line) - len(stripped))
+    # Remove indentation (first line is special):
+    trimmed = [lines[0].strip()]
+    if indent < sys.maxsize:
+        for line in lines[1:]:
+            trimmed.append(line[indent:].rstrip())
+    # Strip off trailing and leading blank lines:
+    while trimmed and not trimmed[-1]:
+        trimmed.pop()
+    while trimmed and not trimmed[0]:
+        trimmed.pop(0)
+    # Return a single string:
+    return '\n'.join(trimmed)
+
+
+def add_port_docs(wrapped=None, instance=None, value=''):
+    if wrapped is None:
+        return functools.partial(add_port_docs, value=value)
+
+    @wrapt.decorator
+    def wrapper(wrapped, instance=None, args=None, kwargs=None):
+        return wrapped(*args, **kwargs)
+
+    decorated = wrapper(wrapped)
+    try:
+        port_2_ntype = decorated(instance)
+    except:
+        port_2_ntype = None
+
+    port_description = ""
+    if port_2_ntype is not None:
+        for port, ntype in port_2_ntype.items():
+            port_description += "* *" + port + "* : " + str(ntype)
+            port_description += "\n\n"
+
+    __doc__ = _normalize_docstring(wrapped.__doc__) + '\n\n' + str(port_description)
+    __doc__ = _normalize_docstring(__doc__)
+
+    wrapt.FunctionWrapper.__setattr__(decorated, "__doc__", __doc__)
+
+    return decorated
diff --git a/nemo/utils/exp_logging.py b/nemo/utils/exp_logging.py
index 3af7e8d93139..fd3a0540ffe2 100644
--- a/nemo/utils/exp_logging.py
+++ b/nemo/utils/exp_logging.py
@@ -1,18 +1,24 @@
 # Copyright (c) 2019 NVIDIA Corporation
-import logging
 import os
 import subprocess
 import sys
 import time
 from shutil import copyfile
 
-import nemo
+from nemo.utils import logging
 from nemo.utils.decorators import deprecated
 
 
-@deprecated(version=0.11, explanation="Please use nemo.logging instead")
+# from nemo.utils import logging
+@deprecated(
+    version=0.11,
+    explanation=(
+        "Please use nemo.logging instead by using from nemo.utils import logging and logging.info(), "
+        "logging.warning() , etc."
+    ),
+)
 def get_logger(unused):
-    return nemo.logging
+    return logging
 
 
 # class ContextFilter(logging.Filter):
@@ -92,7 +98,6 @@ def __init__(
     ):
         self.local_rank = local_rank if local_rank is not None else 0
         self.global_rank = global_rank if global_rank is not None else 0
-        self.logger = None
         self.log_file = None
         self.tb_writer = None
         self.work_dir = None
@@ -118,12 +123,15 @@ def __init__(
         # Create work_dir if specified
         if work_dir:
             self.work_dir = work_dir
+            # only create tm_sur dir if checkpoints dir is not present in the work_dir
             if add_time:
                 self.work_dir = os.path.join(work_dir, tm_suf)
             self.make_dir(self.work_dir, exist_ok)
+            self.ckpt_dir = os.path.join(self.work_dir, 'checkpoints')
+
             if use_tb:
                 self.get_tb_writer(exist_ok=exist_ok)
-            self.ckpt_dir = f'{self.work_dir}/checkpoints'
+
             if files_to_copy and self.global_rank == 0:
                 for file in files_to_copy:
                     basename = os.path.basename(file)
@@ -144,34 +152,19 @@ def __init__(
                         f.write(get_git_diff())
 
         # Create loggers
-        self.create_logger(log_file=bool(work_dir))
+        if bool(work_dir):
+            self.add_file_handler_to_logger()
         if use_tb and not work_dir:
-            raise ValueError("ExpManager received use_tb as True but did not " "receive a work_dir")
+            raise ValueError("ExpManager received use_tb as True but did not receive a work_dir")
 
         if ckpt_dir:
             self.ckpt_dir = ckpt_dir
         if self.ckpt_dir:
             self.make_dir(self.ckpt_dir, exist_ok)
 
-    def create_logger(self, level=logging.INFO, log_file=True):
-        logger = nemo.logging
-        # tmp = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
-
-        # if self.global_rank == 0:
-        #     logger.setLevel(level)
-        #     ch = logging.StreamHandler()
-        #     ch.setLevel(level)
-        #     ch.setFormatter(tmp)
-        #     logger.addHandler(ch)
-
-        if log_file:
-            self.log_file = f'{self.work_dir}/log_globalrank-{self.global_rank}_' f'localrank-{self.local_rank}.txt'
-            logger.add_file_handler(self.log_file)
-            # fh = logging.FileHandler(self.log_file)
-            # fh.setLevel(level)
-            # fh.setFormatter(tmp)
-        self.logger = logger
-        return logger
+    def add_file_handler_to_logger(self):
+        self.log_file = f'{self.work_dir}/log_globalrank-{self.global_rank}_' f'localrank-{self.local_rank}.txt'
+        logging.add_file_handler(self.log_file)
 
     def make_dir(self, dir_, exist_ok):
         # We might want to limit folder creation to only global_rank 0
@@ -192,19 +185,16 @@ def get_tb_writer(self, tb_dir=None, exist_ok=True):
                 self.tb_writer = SummaryWriter(self.tb_dir)
             except ImportError:
                 self.tb_writer = None
-                nemo.logging.info('Not using TensorBoard.')
-                nemo.logging.info('Install tensorboardX to use TensorBoard')
+                logging.info('Not using TensorBoard.')
+                logging.info('Install tensorboardX to use TensorBoard')
         return self.tb_writer
 
     def log_exp_info(self, params, print_everywhere=False):
         if print_everywhere or self.global_rank == 0:
-            nemo.logging.info("NEMO MODEL'S PARAMETERS")
+            logging.info("NEMO MODEL'S PARAMETERS")
             for key in params:
-                nemo.logging.info(f'{key}\t{params[key]}')
-            nemo.logging.info(f'Experiment output is stored in {self.work_dir}')
-
-    def reset_loggers(self):
-        nemo.logging.handlers = []
+                logging.info(f'{key}\t{params[key]}')
+            logging.info(f'Experiment output is stored in {self.work_dir}')
 
 
 def get_git_hash():
diff --git a/nemo/utils/formatters/base.py b/nemo/utils/formatters/base.py
index 6b844877b185..12500477b9c8 100644
--- a/nemo/utils/formatters/base.py
+++ b/nemo/utils/formatters/base.py
@@ -126,3 +126,9 @@ def format(self, record):
 
 class BaseNeMoFormatter(BaseFormatter):
     DEFAULT_FORMAT = "%(color)s[NeMo %(levelname)1.1s %(asctime)s %(module)s:%(lineno)d]%(end_color)s %(message)s"
+
+
+class DebugNeMoFormatter(BaseFormatter):
+    DEFAULT_FORMAT = (
+        "%(color)s[NeMo %(levelname)1.1s %(asctime)s %(module)s:%(lineno)d rank:%(rank)d]%(end_color)s %(message)s"
+    )
diff --git a/nemo/utils/helpers.py b/nemo/utils/helpers.py
index 0a20a05f5a7e..b2713766320d 100644
--- a/nemo/utils/helpers.py
+++ b/nemo/utils/helpers.py
@@ -2,7 +2,6 @@
 import functools
 import glob
 import os
-import tarfile
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Optional
@@ -11,6 +10,9 @@
 import wget
 
 import nemo
+from nemo.utils import logging
+
+# from nemo.utils import logging
 
 
 def rgetattr(obj, attr, *args):
@@ -121,42 +123,51 @@ def get_cuda_device(placement):
 #                                          placement=device)
 
 
-def maybe_download_from_cloud(url, filename) -> str:
+def maybe_download_from_cloud(url, filename, subfolder=None, cache_dir=None, referesh_cache=False) -> str:
     """
     Helper function to download pre-trained weights from the cloud
     Args:
         url: (str) URL of storage
-        filename: (str) what to download. The request will be issued
-        to url/filename or url/filename.tar.gz
+        filename: (str) what to download. The request will be issued to url/filename
+        subfolder: (str) subfolder within cache_dir. The file will be stored in cache_dir/subfolder. Subfolder can
+            be empty
+        cache_dir: (str) a cache directory where to download. If not present, this function will attempt to create it.
+            If None (default), then it will be $HOME/.cache/torch/NeMo
+        referesh_cache: (bool) if True and cached file is present, it will delete it and re-fetch
 
     Returns:
-        If successful - absolute local path to the directory where
-        checkpoints are
+        If successful - absolute local path to the downloaded file
         else - empty string
     """
-    try:
-        nfname = ".nemo_files"
-        # check if ~/.nemo_files exists, if not - create
-        home_folder = Path.home()
-        nf_absname = os.path.join(home_folder, nfname)
-        if not os.path.exists(nf_absname):
-            os.mkdir(nf_absname)
-        # check if thing is already downloaded and unpacked
-        if filename.endswith('.tar.gz'):
-            name = filename[:-7]
-        else:
-            name = filename
-        destination = os.path.join(nf_absname, name)
-        if os.path.exists(destination):
-            return str(destination)
-        # download file
-        wget.download(url + name + ".tar.gz", str(nf_absname))
-        tf = tarfile.open(os.path.join(nf_absname, name + ".tar.gz"))
-        tf.extractall(nf_absname)
-        if os.path.exists(destination):
-            return destination
+    # try:
+    if cache_dir is None:
+        cache_location = Path.joinpath(Path.home(), '.cache/torch/NeMo')
+    else:
+        cache_location = cache_dir
+    if subfolder is not None:
+        destination = Path.joinpath(cache_location, subfolder)
+    else:
+        destination = cache_location
+
+    if not os.path.exists(destination):
+        os.makedirs(destination, exist_ok=True)
+
+    destination_file = Path.joinpath(destination, filename)
+
+    if os.path.exists(destination_file):
+        logging.info(f"Found existing object {destination_file}.")
+        if referesh_cache:
+            logging.info("Asked to refresh the cache.")
+            logging.info(f"Deleting file: {destination_file}")
+            os.remove(destination_file)
         else:
-            return ""
-    except (FileNotFoundError, ConnectionError, OSError):
-        nemo.logging.info(f"Could not obtain {filename} from the cloud")
+            logging.info(f"Re-using file from: {destination_file}")
+            return str(destination_file)
+    # download file
+    wget_uri = url + filename
+    logging.info(f"Downloading from: {wget_uri} to {str(destination_file)}")
+    wget.download(wget_uri, str(destination_file))
+    if os.path.exists(destination_file):
+        return destination_file
+    else:
         return ""
diff --git a/nemo/utils/lr_policies.py b/nemo/utils/lr_policies.py
index a3a9319324dd..7bb76113930e 100644
--- a/nemo/utils/lr_policies.py
+++ b/nemo/utils/lr_policies.py
@@ -1,11 +1,14 @@
 # Copyright (c) 2019 NVIDIA Corporation
 __all__ = [
     'WarmupPolicy',
+    'WarmupHoldPolicy',
     'SquareAnnealing',
     'CosineAnnealing',
     'WarmupAnnealing',
     'InverseSquareRootAnnealing',
     'SquareRootAnnealing',
+    'PolynomialDecayAnnealing',
+    'PolynomialHoldDecayAnnealing',
 ]
 
 import inspect
@@ -75,6 +78,53 @@ def _get_lr(self, initial_lr, step, epoch):
         return initial_lr
 
 
+class WarmupHoldPolicy(WarmupPolicy):
+    """Variant of WarmupPolicy which maintains high learning rate for a defined number of steps.
+
+    All arguments should be passed as kwargs for clarity,
+
+    Args:
+        warmup_steps: Number of training steps in warmup stage
+        warmup_ratio: Ratio of warmup steps to total steps
+        hold_steps: Number of training steps to hold the learning rate after warm up
+        hold_ratio: Ratio of hold steps to total steps
+        total_steps: Total number of steps while training or `None` for
+            infinite training
+
+    """
+
+    def __init__(
+        self, *, warmup_steps=None, warmup_ratio=None, hold_steps=None, hold_ratio=None, total_steps=None, min_lr=0.0
+    ):
+        assert not (hold_steps is not None and hold_ratio is not None), "Either use particular number of step or ratio"
+        assert hold_ratio is None or total_steps is not None, "If there is a ratio, there should be a total steps"
+        super().__init__(warmup_steps=warmup_steps, warmup_ratio=warmup_ratio, total_steps=total_steps)
+
+        self._min_lr = min_lr
+        self._last_warmup_lr = 0.0
+
+        if hold_steps is not None:
+            self.hold_steps = hold_steps + self.warmup_steps
+        elif hold_ratio is not None:
+            self.hold_steps = int(hold_ratio * total_steps) + self.warmup_steps
+        else:
+            self.hold_steps = 0
+
+    def __call__(self, initial_lr, step, epoch):
+        # Warmup phase
+        if step < self.warmup_steps:
+            warmup_lr = initial_lr * (step + 1) / (self.warmup_steps + 1)
+            return warmup_lr
+
+        # Hold phase
+        if (step >= self.warmup_steps) and (step < self.hold_steps):
+            return initial_lr
+
+        if step > self.total_steps:
+            return self._min_lr
+        return self._get_lr(initial_lr, step, epoch)
+
+
 def _squareroot_annealing(initial_lr, step, total_steps, min_lr):
     mult = ((total_steps - step) / total_steps) ** 0.5
     out_lr = initial_lr * mult
@@ -95,6 +145,18 @@ def _cosine_annealing(initial_lr, step, total_steps, min_lr):
     return out_lr
 
 
+def _poly_decay(initial_lr, step, decay_steps, power, min_lr, cycle):
+    if cycle:
+        multiplier = 1.0 if step == 0 else math.ceil(step / decay_steps)
+        decay_steps *= multiplier
+    else:
+        step = min(step, decay_steps)
+    p = step / decay_steps
+    lr = (initial_lr - min_lr) * math.pow(1.0 - p, power)
+    lr += min_lr
+    return lr
+
+
 class SquareAnnealing(WarmupPolicy):
     def __init__(self, total_steps, min_lr=1e-5, **kwargs):
         super().__init__(total_steps=total_steps, **kwargs)
@@ -162,6 +224,42 @@ def _get_lr(self, initial_lr, step, epoch):
         return out_lr
 
 
+class PolynomialDecayAnnealing(WarmupPolicy):
+    def __init__(self, total_steps, min_lr=0.0, power=1.0, cycle=False, **kwargs):
+        super().__init__(total_steps=total_steps, **kwargs)
+        self.min_lr = min_lr
+        self.power = power
+        self.cycle = cycle
+
+    def _get_lr(self, initial_lr, step, epoch):
+        return _poly_decay(
+            initial_lr,
+            step=step - self.warmup_steps,
+            decay_steps=self.total_steps - self.warmup_steps,
+            power=self.power,
+            min_lr=self.min_lr,
+            cycle=self.cycle,
+        )
+
+
+class PolynomialHoldDecayAnnealing(WarmupHoldPolicy):
+    def __init__(self, total_steps, min_lr=0.0, power=1.0, cycle=False, **kwargs):
+        super().__init__(total_steps=total_steps, **kwargs)
+        self.min_lr = min_lr
+        self.power = power
+        self.cycle = cycle
+
+    def _get_lr(self, initial_lr, step, epoch):
+        return _poly_decay(
+            initial_lr,
+            step=step - self.hold_steps,
+            decay_steps=self.total_steps - max(self.warmup_steps, self.hold_steps),
+            power=self.power,
+            min_lr=self.min_lr,
+            cycle=self.cycle,
+        )
+
+
 def get_all_lr_classes():
     """ Get all LR classes defined within this module
     """
diff --git a/nemo/utils/metaclasses.py b/nemo/utils/metaclasses.py
index 0f584aa76cad..ead4b4560531 100644
--- a/nemo/utils/metaclasses.py
+++ b/nemo/utils/metaclasses.py
@@ -12,18 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.****
 
-__all__ = [
-    "SingletonMetaClass",
-]
+import threading
 
 
-class SingletonMetaClass(type):
+class Singleton(type):
+    """ Implementation of a generic, tread-safe singleton meta-class.
+        Can be used as meta-class, i.e. will create 
+    """
 
-    _instances = {}
+    # List of instances - one per class.
+    __instances = {}
+    # Lock used for accessing the instance.
+    __lock = threading.Lock()
 
     def __call__(cls, *args, **kwargs):
-
-        if cls not in cls._instances:
-            cls._instances[cls] = super(SingletonMetaClass, cls).__call__(*args, **kwargs)
-
-        return cls._instances[cls]
+        """ Returns singleton instance. A thread safe implementation. """
+        if cls not in cls.__instances:
+            # Enter critical section.
+            with cls.__lock:
+                # Check once again.
+                if cls not in cls.__instances:
+                    # Create a new object instance - one per class.
+                    cls.__instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+        # Return the instance.
+        return cls.__instances[cls]
diff --git a/nemo/utils/nemo_logging.py b/nemo/utils/nemo_logging.py
index 4e49028d0b6c..7fed7ff0c5c3 100644
--- a/nemo/utils/nemo_logging.py
+++ b/nemo/utils/nemo_logging.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.****
 
+import enum
 import logging as _logging
 import sys
 import threading
@@ -19,17 +20,20 @@
 from contextlib import contextmanager
 
 # from nemo.constants import NEMO_ENV_VARNAME_SAVE_LOGS_TO_DIR
-from nemo.constants import NEMO_ENV_VARNAME_REDIRECT_LOGS_TO_STDERR
+from nemo.constants import NEMO_ENV_VARNAME_REDIRECT_LOGS_TO_STDERR, NEMO_ENV_VARNAME_TESTING
 from nemo.utils.env_var_parsing import get_envbool, get_envint
-from nemo.utils.formatters.base import BaseNeMoFormatter
-from nemo.utils.metaclasses import SingletonMetaClass
+from nemo.utils.formatters.base import BaseNeMoFormatter, DebugNeMoFormatter
+from nemo.utils.metaclasses import Singleton
 
-__all__ = [
-    "Logger",
-]
+__all__ = ["Logger", "LogMode"]
 
 
-class Logger(metaclass=SingletonMetaClass):
+class LogMode(enum.IntEnum):
+    EACH = 0  # Log the message each time
+    ONCE = 1  # Log the message only once. The same message will not be logged again.
+
+
+class Logger(metaclass=Singleton):
 
     # Level 0
     NOTSET = _logging.NOTSET
@@ -71,6 +75,8 @@ def __init__(self):
 
         self._define_logger()
 
+        self.once_logged = set()
+
     def _define_logger(self):
 
         # Use double-checked locking to avoid taking lock unnecessarily.
@@ -82,7 +88,17 @@ def _define_logger(self):
                 self._logger = _logging.getLogger("nemo_logger")
                 # By default, silence all loggers except the logger for rank 0
                 self.remove_stream_handlers()
-                if get_envint("RANK", 0) == 0:
+                if get_envbool(NEMO_ENV_VARNAME_TESTING, False):
+                    old_factory = _logging.getLogRecordFactory()
+
+                    def record_factory(*args, **kwargs):
+                        record = old_factory(*args, **kwargs)
+                        record.rank = get_envint("RANK", 0)
+                        return record
+
+                    _logging.setLogRecordFactory(record_factory)
+                    self.add_stream_handlers(formatter=DebugNeMoFormatter)
+                elif get_envint("RANK", 0) == 0:
                     self.add_stream_handlers()
 
             finally:
@@ -106,7 +122,7 @@ def remove_stream_handlers(self):
         except KeyError:
             pass
 
-    def add_stream_handlers(self):
+    def add_stream_handlers(self, formatter=BaseNeMoFormatter):
         if self._logger is None:
             raise RuntimeError("Impossible to set handlers if the Logger is not predefined")
 
@@ -121,8 +137,6 @@ def add_stream_handlers(self):
             self._handlers["stream_stderr"] = _logging.StreamHandler(sys.stderr)
             self._handlers["stream_stderr"].addFilter(lambda record: record.levelno > _logging.INFO)
 
-        formatter = BaseNeMoFormatter
-
         self._handlers["stream_stdout"].setFormatter(formatter())
         self._logger.addHandler(self._handlers["stream_stdout"])
 
@@ -132,9 +146,9 @@ def add_stream_handlers(self):
         except KeyError:
             pass
 
-    def reset_stream_handler(self):
+    def reset_stream_handler(self, formatter=BaseNeMoFormatter):
         self.remove_stream_handlers()
-        self.add_stream_handlers()
+        self.add_stream_handlers(formatter=formatter)
 
     def add_file_handler(self, log_file):
         if self._logger is None:
@@ -198,6 +212,39 @@ def patch_stderr_handler(self, stream):
         else:
             raise RuntimeError("Impossible to patch logging handlers if handler does not exist")
 
+    @contextmanager
+    def patch_stdout_handler(self, stream):
+        """ Useful for unittests
+        """
+        if self._logger is not None:
+            try:
+                old_stream = self._handlers["stream_stdout"].stream
+                if old_stream is None:
+                    raise ValueError
+
+                # Port backwards set_stream() from python 3.7
+                self._handlers["stream_stdout"].acquire()
+                try:
+                    self._handlers["stream_stdout"].flush()
+                    self._handlers["stream_stdout"].stream = stream
+                finally:
+                    self._handlers["stream_stdout"].release()
+
+                yield stream
+            except (KeyError, ValueError):
+                raise RuntimeError("Impossible to patch logging handlers if handler does not exist")
+            finally:
+                # Port backwards set_stream() from python 3.7
+                self._handlers["stream_stdout"].acquire()
+                try:
+                    self._handlers["stream_stdout"].flush()
+                    self._handlers["stream_stdout"].stream = old_stream
+                finally:
+                    self._handlers["stream_stdout"].release()
+
+        else:
+            raise RuntimeError("Impossible to patch logging handlers if handler does not exist")
+
     @contextmanager
     def temp_verbosity(self, verbosity_level):
         """Sets the a temporary threshold for what messages will be logged."""
@@ -248,7 +295,15 @@ def _showwarning(self, message, category, filename, lineno, line=None):
         s = warnings.formatwarning(message, category, filename, lineno, line)
         self.warning("%s", s)
 
-    def debug(self, msg, *args, **kwargs):
+    def _logged_once(self, msg, mode):
+        PREFIX_LEN = 12
+        if mode == LogMode.ONCE:
+            if msg[PREFIX_LEN:] in self.once_logged:
+                return True
+            self.once_logged.add(msg[PREFIX_LEN:])
+        return False
+
+    def debug(self, msg, *args, mode=LogMode.EACH, **kwargs):
         """
         Log 'msg % args' with severity 'DEBUG'.
 
@@ -257,10 +312,10 @@ def debug(self, msg, *args, **kwargs):
 
         logger.debug("Houston, we have a %s", "thorny problem", exc_info=1)
         """
-        if self._logger is not None and self._logger.isEnabledFor(Logger.DEBUG):
+        if self._logger is not None and self._logger.isEnabledFor(Logger.DEBUG) and not self._logged_once(msg, mode):
             self._logger._log(Logger.DEBUG, msg, args, **kwargs)
 
-    def info(self, msg, *args, **kwargs):
+    def info(self, msg, *args, mode=LogMode.EACH, **kwargs):
         """
         Log 'msg % args' with severity 'INFO'.
 
@@ -269,10 +324,10 @@ def info(self, msg, *args, **kwargs):
 
         logger.info("Houston, we have a %s", "interesting problem", exc_info=1)
         """
-        if self._logger is not None and self._logger.isEnabledFor(Logger.INFO):
+        if self._logger is not None and self._logger.isEnabledFor(Logger.INFO) and not self._logged_once(msg, mode):
             self._logger._log(Logger.INFO, msg, args, **kwargs)
 
-    def warning(self, msg, *args, **kwargs):
+    def warning(self, msg, *args, mode=LogMode.EACH, **kwargs):
         """
         Log 'msg % args' with severity 'WARNING'.
 
@@ -281,10 +336,10 @@ def warning(self, msg, *args, **kwargs):
 
         logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
         """
-        if self._logger is not None and self._logger.isEnabledFor(Logger.WARNING):
+        if self._logger is not None and self._logger.isEnabledFor(Logger.WARNING) and not self._logged_once(msg, mode):
             self._logger._log(Logger.WARNING, msg, args, **kwargs)
 
-    def error(self, msg, *args, **kwargs):
+    def error(self, msg, *args, mode=LogMode.EACH, **kwargs):
         """
         Log 'msg % args' with severity 'ERROR'.
 
@@ -293,10 +348,10 @@ def error(self, msg, *args, **kwargs):
 
         logger.error("Houston, we have a %s", "major problem", exc_info=1)
         """
-        if self._logger is not None and self._logger.isEnabledFor(Logger.ERROR):
+        if self._logger is not None and self._logger.isEnabledFor(Logger.ERROR) and not self._logged_once(msg, mode):
             self._logger._log(Logger.ERROR, msg, args, **kwargs)
 
-    def critical(self, msg, *args, **kwargs):
+    def critical(self, msg, *args, mode=LogMode.EACH, **kwargs):
         """
         Log 'msg % args' with severity 'CRITICAL'.
 
@@ -305,9 +360,9 @@ def critical(self, msg, *args, **kwargs):
 
         logger.critical("Houston, we have a %s", "major disaster", exc_info=1)
         """
-        if self._logger is not None and self._logger.isEnabledFor(Logger.CRITICAL):
+        if (
+            self._logger is not None
+            and self._logger.isEnabledFor(Logger.CRITICAL)
+            and not self._logged_once(msg, mode)
+        ):
             self._logger._log(Logger.CRITICAL, msg, args, **kwargs)
-
-
-# # Necessary to catch the correct caller
-# _logging._srcfile = os.path.normcase(inspect.getfile(Logger.__class__))
diff --git a/nemo/utils/neural_graph/connection.py b/nemo/utils/neural_graph/connection.py
new file mode 100644
index 000000000000..e181f54d9876
--- /dev/null
+++ b/nemo/utils/neural_graph/connection.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+
+from collections import namedtuple
+
+# Tuple used for storing "step number", "module name" and "port name".
+# (used in NmTensor's producer/consumer, port binding etc.).
+# Module name is redundant, as it can be recovered from the step number.
+StepModulePort = namedtuple('StepModulePort', ["step_number", "module_name", "port_name"])
+
+
+# Tuple used for connection between a single producer and a single consummer consumer.
+# (used in NmTensor's producer/consumer, port binding etc.).
+Connection = namedtuple('Connection', ["producer", "consumer", "ntype"])
diff --git a/nemo/utils/neural_graph/graph_inputs.py b/nemo/utils/neural_graph/graph_inputs.py
new file mode 100644
index 000000000000..aa18bde8fc29
--- /dev/null
+++ b/nemo/utils/neural_graph/graph_inputs.py
@@ -0,0 +1,226 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from collections.abc import MutableMapping
+from typing import Dict, List, Optional, Union
+
+from frozendict import frozendict
+
+from nemo.utils import logging
+from nemo.utils.neural_graph.connection import StepModulePort
+
+
+class GraphInput(object):
+    """ A helper class represenging a single bound input. """
+
+    def __init__(self, ntype: "NeuralType"):
+        """ 
+        Initializes object.
+
+        Args:
+            ntype: a NeuralType object.
+        """
+        # (Neural) Type of input.
+        self._ntype = ntype
+        # List of StepModulePort tuples to which this input links to (step number, module name, port name).
+        self._consumers = []
+
+    def bind(self, step_module_ports: Union[StepModulePort, List[StepModulePort]]):
+        """ Binds the (step-module-ports) to this "graph input".
+            Add "consumers" of this graph input (modules attached to this port),
+            so when one actually will pass the NmTensor, those modules will be connected.
+
+            Args:
+                step_module_ports: A single StepModulePort OR a list of StepModulePort tuples to be added.
+        """
+        # Handle both single port and lists of ports to be bound.
+        if type(step_module_ports) is not list:
+            step_module_ports = [step_module_ports]
+        # Interate through "consumers" on the list and add them to bound input.
+        for smp in step_module_ports:
+            self._consumers.append(smp)
+
+    @property
+    def ntype(self) -> "NeuralType":
+        """
+            Returns:
+                NeuralType of a given input.
+        """
+        return self._ntype
+
+    @property
+    def consumers(self) -> List[StepModulePort]:
+        """ 
+            Returns:
+                List of bound modules i.e. (step number, module name, port name) tupes.
+        """
+        return self._consumers
+
+
+class GraphInputs(MutableMapping):
+    '''
+        A specialized dictionary that contains bound inputs of a Neural Graph.
+    '''
+
+    def __init__(self):
+        """
+            Initializes an empty dictionary.
+        """
+        self._inputs = {}
+
+    def __setitem__(self, key: str, value: Union["NeuralType", GraphInput]):
+        """
+            This method is used to "create" a bound input, i.e. copy definition from indicated module input port.
+
+            Args:
+                key: name of the input port of the Neural Graph.
+                value: NeuralType (or GraphInput) that will be set.
+            
+            Raises:
+                KeyError: Definition of a previously bound port is not allowed.
+                TypeError: Port definition must be must be a NeuralType or GraphInput type.
+        """
+        # Make sure that a proper object was passed here.
+        if type(value).__name__ == "NeuralType":
+            ntype = value
+        elif isinstance(value, GraphInput):
+            ntype = value.ntype
+        else:
+            raise TypeError("Port `{}` definition must be must be a NeuralType or GraphInput type".format(key))
+
+        if key in self._inputs.keys():
+            if self._inputs[key].ntype == ntype:
+                raise KeyError("Overwriting definition of a previously bound port `{}` is not allowed".format(key))
+            # Else: do nothing.
+        else:
+            # Ok, add definition to list of mapped (module, port)s.
+            # Note: for now, there are no mapped modules, so copy only the (neural) type.
+            self._inputs[key] = GraphInput(ntype=ntype)
+
+    def __getitem__(self, key: str) -> GraphInput:
+        """
+            Returns the bound input associated with the given key.
+
+            Args:
+                key: Name of the bound input.
+        """
+        return self._inputs[key]
+
+    def __delitem__(self, key: str):
+        """
+            Raises:
+                TypeError as deletion of a bound input port is not allowed.
+        """
+        raise TypeError("Deletion of a bound input port is not allowed")
+
+    def __iter__(self):
+        """ 
+            Returns:
+                Iterator over the dict of bound inputs.
+        """
+        return iter(self._inputs)
+
+    def __len__(self) -> int:
+        """
+            Return:
+                The number of bound inputs.
+        """
+        return len(self._inputs)
+
+    @property
+    def definitions(self) -> Dict[str, "NeuralType"]:
+        """
+            Property returns definitions of the input ports by extracting them on the fly from list.
+
+            ..info:
+                This property actually returns a FrozenDict containing port definitions to indicate that
+                port definitions SHOULD not be used during the actual binding.
+            
+            Returns:
+                Dictionary of neural types associated with bound inputs.
+        """
+        # Extract port definitions (Neural Types) from the inputs list.
+        return frozendict({k: v.ntype for k, v in self._inputs.items()})
+
+    def has_binding(self, step_number: int, port_name: str) -> Optional[str]:
+        """ 
+            Checks if there is a binding leading to a given step number (module) and its given port. 
+            (module name is redundant, thus skipped in this test).
+
+            Returns:
+                key in the list of the (bound) input ports that leads to a given step (module)/port
+                or None if the binding was not found.
+        """
+        for key, binding in self._inputs.items():
+            for (step, _, port) in binding.consumers:
+                if step == step_number and port == port_name:
+                    return key
+        # Binding not found.
+        return None
+
+    def serialize(self) -> List[str]:
+        """ Method responsible for serialization of the graph inputs.
+
+            Returns:
+                List containing mappings (input -> step.module.input_port).
+        """
+        serialized_inputs = []
+        # Iterate through "bindings" (GraphInputs).
+        for key, binding in self._inputs.items():
+            # Get type.
+            ntype_str = str(binding.ntype)
+            for (step, module, port) in binding.consumers:
+                # Serialize: input -> step.module.port | ntype
+                target = str(step) + "." + module + "." + port
+                # Serialize!
+                serialized_inputs.append(key + "->" + target + " | " + ntype_str)
+        # Return the result.
+        return serialized_inputs
+
+    @classmethod
+    def deserialize(cls, serialized_inputs: List[str], modules: Dict[str, 'NeuralModule']):
+        """ 
+            Class method responsible for deserialization of graph inputs.
+
+            Args:
+                serialized_inputs: A list of serialized inputs in the form of ("input->module.input_port")
+                modules: List of modules required for neural type copying/checking.
+
+            Returns:
+                Dictionary with deserialized inputs.
+        """
+        inputs = GraphInputs()
+        # Iterate through serialized inputs one by one.
+        for i in serialized_inputs:
+            # Deserialize!
+            [key, consumer_ntype] = i.split("->")
+            [consumer, ntype_str] = consumer_ntype.split(" | ")
+            [consumer_step, consumer_name, consumer_port_name] = consumer.split(".")
+            # Add the input.
+            if key not in inputs.keys():
+                # Get neural type from module input port definition.
+                ntype = modules[consumer_name].input_ports[consumer_port_name]
+                # Make sure the graph bound  port type matches the deserialized type.
+                assert ntype_str == str(ntype)
+
+                # Create a new input.
+                inputs[key] = ntype
+            # Bind the "consumers".
+            inputs[key].bind(StepModulePort(int(consumer_step), consumer_name, consumer_port_name))
+        # Done.
+        return inputs
diff --git a/nemo/utils/neural_graph/graph_outputs.py b/nemo/utils/neural_graph/graph_outputs.py
new file mode 100644
index 000000000000..6f14c6848cb8
--- /dev/null
+++ b/nemo/utils/neural_graph/graph_outputs.py
@@ -0,0 +1,302 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from collections.abc import MutableMapping
+from typing import Any, Dict, List, Optional
+
+from frozendict import frozendict
+
+from nemo.utils import logging
+from nemo.utils.neural_graph.connection import StepModulePort
+
+
+class GraphOutput(object):
+    """ A helper class represenging a single bound output. """
+
+    def __init__(self, ntype: "NeuralType", producer_step_module_port: StepModulePort):
+        """ 
+        Initializes object.
+
+        Args:
+            ntype: a NeuralType object.
+            producer_step_module_port: a producer StepModulePort tuple (step number (module name), port name).
+        """
+        self._ntype = ntype
+        self._producer_step_module_port = producer_step_module_port
+
+    @property
+    def ntype(self) -> "NeuralType":
+        """ 
+            Returns:
+                NeuralType of a given output.
+        """
+        return self._ntype
+
+    @property
+    def producer_step_module_port(self) -> StepModulePort:
+        """ Returns producer step port (step number (module), port name) tuple. """
+        return self._producer_step_module_port
+
+
+class GraphOutputs(MutableMapping):
+    '''
+        A specialized dictionary that contains bound outputs of a Neural Graph.
+        In fact stores two lists of "outputs":
+            - "default" outputs with default keys taken from outputs of modules, and
+            - "manual" used for specifying the subset of outputs.
+        When accessing the outputs, it returns the one of those two lists following the rule:
+        return "manual" outputs if they were define (at least one manual output defined by the user),
+        otherwise return the "default" outputs.
+    '''
+
+    def __init__(self, tensors_ref):
+        """
+            Initializes two (empty) dictionaries. 
+
+            Args:
+                tensors_ref - reference to neural graph's tensor (dict of dict).
+        """
+
+        # Tensors[step][output_port_name] passed from the external neural graph object.
+        self._tensors_ref = tensors_ref
+
+        # This dictionary stores the bound outputs collected during the "default" recording of produced tensors.
+        # As they are using the default port names, the second/next tensor published on the same port
+        # will generate a new unique name following the (step_number.module.port_name) pattern.
+        self._default_outputs = {}
+
+        # This dictionary stores list of outputs of modules "manually" bound by the user.
+        # In this case tring to overwriting the existing ports with new tensors will be forbidden (Exception).
+        self._manual_outputs = {}
+
+    def __setitem__(self, key: str, value: "NmTensor"):
+        """
+            This method is used to set the manual output - creates a GraphOutput item and adds it to the list.
+            
+            Args:
+                key: The name of the output (port).
+                value: NmTensor that will be used to create a given GraphOutput.
+        """
+        # Make sure that user passed a NmTensor.
+        if type(value).__name__ != "NmTensor":
+            raise TypeError("Port `{}` definition must be must be set using a NmTensor".format(key))
+
+        if key in self._manual_outputs.keys():
+            raise KeyError("Overwriting of a port `{}` that was previously manually bound is not allowed".format(key))
+
+        # Ok, set thee "manual" output.
+        self._manual_outputs[key] = GraphOutput(value.ntype, value.producer_step_module_port)
+
+    def __getitem__(self, key: str) -> GraphOutput:
+        """
+            Returns the bound output associated with the given key.
+            Uses default or manual dict depending whether there are some manual outputs or not.
+
+            Args:
+                key: Name of the bound input.
+        """
+        if len(self._manual_outputs) > 0:
+            return self._manual_outputs[key]
+        else:  # Use default dict.
+            return self._default_outputs[key]
+
+    def __delitem__(self, key: str):
+        """
+            Raises:
+                TypeError as deletion of a bound input port is not allowed.
+        """
+        raise TypeError("Deleting a bound output is not allowed")
+
+    def __iter__(self):
+        """
+            Returns:
+                Iterator over the outputs - depending whether there are some manual outputs or not.
+        """
+        if len(self._manual_outputs) > 0:
+            return iter(self._manual_outputs)
+        else:  # Use default dict.
+            return iter(self._default_outputs)
+
+    def __len__(self) -> int:
+        """
+            Returns:
+                The number of outputs - depending whether there are some manual outputs or not.
+        """
+        if len(self._manual_outputs) > 0:
+            return len(self._manual_outputs)
+        else:  # Use default dict.
+            return len(self._default_outputs)
+
+    def bind(self, tensors_ref: List["NmTensor"], port_names: Optional[str] = None):
+        """
+            Binds the "default" outputs.
+
+            Args:
+                tensors_ref: List of tensors to be added.
+                port_names: List of port names (visible outside). If None: using internal tensor "output port names".
+        """
+        # Set names.
+        if port_names is None:
+            port_names = [tensor.name for tensor in tensors_ref]
+
+        for name, tensor in zip(port_names, tensors_ref):
+            # Check the presence of the port name in "default" dictionary.
+            if name in self._default_outputs.keys():
+                # Name present - use the name being combination of producer and port names.
+                name = (
+                    str(tensor.producer_step_number) + "_" + tensor.producer_name + "_" + tensor.name
+                )  # last = port name
+
+                logging.debug(
+                    "Setting unique name of the default output port `{}` produced in step {} by `{}` to `{}`".format(
+                        tensor.name, tensor.producer_step_number, tensor.producer_name, name
+                    )
+                )
+            # Store the output.
+            self._default_outputs[name] = GraphOutput(tensor.ntype, tensor.producer_step_module_port)
+
+    @property
+    def definitions(self) -> Dict[str, GraphOutput]:
+        """
+            Property returns definitions of the output ports by extracting them on the fly from the bound outputs.
+
+            ..info:
+                This property actually returns a FrozenDict containing port definitions to indicate that
+                port definitions SHOULD not be used during the actual binding.
+            
+
+            Returns:
+                Dictionary of neural types associated with bound outputs.
+        """
+        # Get the right output dictionary.
+        d = self._manual_outputs if len(self._manual_outputs) > 0 else self._default_outputs
+
+        # Extract port definitions (Neural Types) and return an immutable dictionary,
+        # so the user won't be able to modify its content by an accident!
+        return frozendict({k: v.ntype for k, v in d.items()})
+
+    @property
+    def tensors(self) -> Dict[str, "NmTensor"]:
+        """
+            Property returns output tensors by extracting them on the fly from the bound outputs.
+
+            Returns:
+                Dictionary of tensors in the format (output-name: tensor).
+        """
+        # Get the right output dictionary.
+        d = self._manual_outputs if len(self._manual_outputs) > 0 else self._default_outputs
+
+        output_tensors = {}
+        # Get tensors by acessing the producer-ports.
+        # At that point all keys (k) are unigue - we made sure of that during binding/__setitem__.
+        for k, v in d.items():
+            producer_step = v.producer_step_module_port.step_number
+            producer_port_name = v.producer_step_module_port.port_name
+            # Find the right output tensor.
+            tensor = self._tensors_ref[producer_step][producer_port_name]
+            # Add it to the dictionary.
+            output_tensors[k] = tensor
+        # Return the result as an immutable dictionary,
+        # so the user won't be able to modify its content by an accident!
+        return frozendict(output_tensors)
+
+    @property
+    def tensor_list(self) -> List["NmTensor"]:
+        """
+            Property returns output tensors by extracting them on the fly from the bound outputs.
+            
+            Returns:
+                List of tensors.
+
+        """
+        # Get the right output dictionary.
+        d = self._manual_outputs if len(self._manual_outputs) > 0 else self._default_outputs
+
+        output_tensor_list = []
+        # Get tensors by acessing the producer-ports.
+        for k, v in d.items():
+            producer_step = v.producer_step_module_port.step_number
+            producer_port_name = v.producer_step_module_port.port_name
+            # Find the right output tensor.
+            tensor = self._tensors_ref[producer_step][producer_port_name]
+            # Add it to the list.
+            output_tensor_list.append(tensor)
+        # Return the result.
+        return output_tensor_list
+
+    def serialize(self) -> Dict[str, Any]:
+        """ Method responsible for serialization of the graph outputs.
+
+            Returns:
+                List containing mappings (step.module.output_port -> output | ntype).
+        """
+        serialized_outputs = {"mappings": []}
+
+        # Get the right output dictionary.
+        if len(self._manual_outputs) > 0:
+            serialized_outputs["type"] = "manual"
+            d = self._manual_outputs
+        else:
+            serialized_outputs["type"] = "default"
+            d = self._default_outputs
+
+        # Iterate through "bindings" (GraphOutputs).
+        for key, binding in d.items():
+            # Serialize: step.module.port -> output | ntype.
+            smp = binding.producer_step_module_port
+            source = str(smp.step_number) + "." + smp.module_name + "." + smp.port_name
+            # Get type.
+            ntype_str = str(binding.ntype)
+            # Serialize!
+            serialized_outputs["mappings"].append(source + "->" + key + " | " + ntype_str)
+        # Return the result.
+        return serialized_outputs
+
+    def deserialize(self, serialized_outputs: Dict[str, Any], modules: Dict[str, 'NeuralModule']):
+        """ 
+            Method responsible for deserialization of graph outputs.
+
+            Args:
+                serialized_outputs: A list of serialized outputs in the form of ("step.module.output_port->key | ntype")
+                modules: List of modules required for neural type copying/checking.
+        """
+        # Check type.
+        if serialized_outputs["type"] == "default":
+            # We still need to deserialize.
+            # Use-case: deserialization of a graph with nested graph with bound output.
+            d = self._default_outputs
+        else:
+            d = self._manual_outputs
+
+        # Iterate through serialized inputs one by one.
+        for i in serialized_outputs["mappings"]:
+            # Deserialize!
+            [producer, key_ntype] = i.split("->")
+            [key, ntype_str] = key_ntype.split(" | ")
+            [step_number, producer_name, producer_port_name] = producer.split(".")
+            # Get neural type from module output port definition.
+            ntype = modules[producer_name].output_ports[producer_port_name]
+
+            # Make sure the graph bound port type matches the deserialized type.
+            assert ntype_str == str(ntype)
+
+            # Create a new input.
+            go = GraphOutput(ntype, StepModulePort(int(step_number), producer_name, producer_port_name))
+            d[key] = go
+
+        # Done.
diff --git a/nemo/utils/neural_graph/neural_graph_manager.py b/nemo/utils/neural_graph/neural_graph_manager.py
new file mode 100644
index 000000000000..b8b2e1deeb1f
--- /dev/null
+++ b/nemo/utils/neural_graph/neural_graph_manager.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# Sadly have to import the whole "nemo" python module to avoid circular dependencies.
+# Moreover, at that point nemo module doesn't contain "core", so during "python module registration"
+# nothing from nemo.core, including e.g. types (so we cannot use them for "python 3 type hints").
+from nemo.utils.neural_graph.object_registry import ObjectRegistry
+
+
+class NeuralGraphManager(ObjectRegistry):
+    def __init__(self):
+        """
+            Constructor. Initializes the manager. Sets active graph to None.
+        """
+        super().__init__("graph")
+        self._active_graph = None
+
+    def __eq__(self, other):
+        """
+            Checks if two managers have the same content.
+            Args:
+                other: A second manager object.
+        """
+        if not isinstance(other, ObjectRegistry):
+            return False
+        return super().__eq__(other)
+
+    def summary(self) -> str:
+        """
+            Returns:
+                A summary of the graphs on the list.
+        """
+        # Line "decorator".
+        summary = "\n" + 113 * '=' + "\n"
+        summary += "Registry of {}s:\n".format(self._base_type_name)
+        for graph in self:
+            summary += " * {} ({}) [{}]\n".format(graph.name, len(graph), graph.operation_mode)
+        # Line "decorator".
+        summary += 113 * '='
+        return summary
+
+    @property
+    def active_graph(self) -> "NeuralGraph":
+        """
+            Property returns the active graph. If there is no active graph, creates a new one.
+
+            Returns:
+                The active graph object.
+        """
+        # Create a new graph - training is the default.
+        if self._active_graph is None:
+            # Import core here (to avoid circular dependency between core-utils).
+            from nemo.core import NeuralGraph, OperationMode
+
+            # Create a new "default" graph. Default mode: both.
+            new_graph = NeuralGraph(operation_mode=OperationMode.both)
+            new_graph._name = self.register(new_graph, None)
+            # Set the newly created graph as active.
+            self._active_graph = new_graph
+
+        # Return the graph.
+        return self._active_graph
+
+    @active_graph.setter
+    def active_graph(self, graph: "NeuralGraph"):
+        """
+            Property sets the active graph.
+
+            Args:
+                graph: Neural graph object that will become active.
+        """
+        # Activate the graph.
+        self._active_graph = graph
diff --git a/nemo/utils/neural_graph/object_registry.py b/nemo/utils/neural_graph/object_registry.py
new file mode 100644
index 000000000000..d191e940dc75
--- /dev/null
+++ b/nemo/utils/neural_graph/object_registry.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from weakref import WeakSet
+
+
+class ObjectRegistry(WeakSet):
+    """
+        Registry used for storing references to objects, generating unique names and monitoring their `uniqueness`.
+    """
+
+    def __init__(self, base_type_name):
+        """
+            Stores base type name.
+        """
+        super().__init__()
+        self._base_type_name = base_type_name
+
+    def register(self, new_obj, name: str) -> str:
+        """
+            Registers a new object using the provided name.
+            If name is none - generates new unique name.
+
+            Args:
+                new_obj: An object to be registered.
+                name: A "proposition" for the object name.
+
+            Returns:
+                A unique name (proposition or newly generated name).
+        """
+
+        # Check if object is already in a set.
+        if new_obj in self:
+            # Return its name.
+            return new_obj.name
+
+        # Check object name.
+        if name is None:
+            # Generate a new, unique name.
+            unique_name = self.__generate_unique_name(new_obj)
+        else:
+            # Check if name is unique.
+            if self.has(name):
+                raise NameError("A {} with name `{}` already exists!".format(name, name))
+            # Ok, it is unique.
+            unique_name = name
+
+        # Finally, add object to the set.
+        self.add(new_obj)
+
+        # Return the name.
+        return unique_name
+
+    def has(self, name: str) -> bool:
+        """
+            Check if registry stores object with a given name.
+
+            Args:
+                name: name of the object to be found in the registry.
+        """
+        for obj in self:
+            if obj.name == name:
+                return True
+        # Else:
+        return False
+
+    def __generate_unique_name(self, new_obj) -> str:
+        """
+            Generates a new unique name by adding postfix (number) to base name.
+
+            Args:
+                new_obj: An object to be registered.
+
+            Returns:
+                A generated unique name.
+        """
+        # Iterate through numbers.
+        postfix = 0
+        # Get type name.
+        base_type_name = (type(new_obj).__name__).lower()
+        while True:
+            # Generate name.
+            new_name = base_type_name + str(postfix)
+            # Check uniqueneess.
+            if not self.has(new_name):
+                # Ok, got a unique name!
+                break
+            # Increment index.
+            postfix += 1
+        return new_name
+
+    def __getitem__(self, key: str):
+        """
+        Object getter function.
+
+        Args:
+            key: Object name.
+
+        Returns:
+            Object associated with the key.
+        """
+        # Search for an object with a given name.
+        for obj in self:
+            # Retrieve object
+            if obj.name == key:
+                return obj
+        # Else: seems that there is no object with that name.
+        raise KeyError("A {} with name `{}` don't exists!".format(self._base_type_name, key))
+
+    def __eq__(self, other):
+        """
+            Checks if two registers have the same content.
+
+            Args:
+                other: The second registry object.
+        """
+        if not isinstance(other, WeakSet):
+            return False
+        return super().__eq__(other)
+
+    def summary(self) -> str:
+        """
+            Returns:
+                A summary of the objects on the list.
+        """
+        # Line "decorator".
+        summary = "\n" + 113 * '=' + "\n"
+        summary += "Registry of {}s:\n".format(self._base_type_name)
+        for obj in self:
+            summary += " * {} ({})\n".format(obj.name, type(obj).__name__)
+        # Line "decorator".
+        summary += 113 * '='
+        return summary
diff --git a/nemo/utils/nmtensor_registry.py b/nemo/utils/nmtensor_registry.py
new file mode 100755
index 000000000000..572c51fc54ae
--- /dev/null
+++ b/nemo/utils/nmtensor_registry.py
@@ -0,0 +1,91 @@
+# =============================================================================
+# Copyright (c) 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from weakref import WeakValueDictionary
+
+
+class NmTensorNameRegistry:
+    def __init__(self):
+        """
+            Constructor. Initializes the NmTensorNameRegistry. Reserves the default 'loss' name.
+
+            TODO: We should be recording the tensors of each graph rather than all the tensors.
+        """
+        # Create the nmtensor_naming_dict
+        # which contains a mapping of str to NMTensor.unique_name
+        self._nmtensor_naming_dict = {"loss": "loss"}  # Reserve keyname of 'loss'
+        # Create a dict that maps unique_names to tensors for use with TrainingState.get_tensor()
+        self._nmtensor_uniname_dict = WeakValueDictionary()
+
+    @property
+    def unique_names(self):
+        """Returns the set of all NmTensors.unique_names + 'loss'
+        """
+        return list(self._nmtensor_uniname_dict.keys()) + ["loss"]
+
+    def register(self, tensor: 'NmTensor'):
+        """Helper function to register a newly created NmTensor by adding it to self.__nmtensor_uniname_dict.
+        Should be called from NmTensor.__init__()
+
+        args:
+            tensor (NmTensor): The tensor to be registered.
+        """
+
+        # Check if object is already in a set.
+        if tensor.unique_name in self._nmtensor_uniname_dict:
+            pass
+
+        # Finally, add object to the set.
+        self._nmtensor_uniname_dict[tensor.unique_name] = tensor
+
+    def rename_NmTensor(self, tensor: 'NmTensor', new_name: str):
+        """Helper function that changes the naming dictionary to facilitate user name -> tensor.unique_name lookup.
+
+        args:
+            tensor (NmTensor): The tensor to be renamed.
+            new_name (str): its new name.
+        """
+        # Find old name if exists
+        old_name = tensor.unique_name
+        for custom_name, unique_name in self._nmtensor_naming_dict.items():
+            if unique_name == tensor.unique_name:
+                old_name = custom_name
+
+        if old_name != tensor.unique_name:
+            del self._nmtensor_naming_dict[old_name]
+
+        if new_name in self._nmtensor_naming_dict:
+            raise KeyError(f"{new_name} already exists in current graph. Please use a unique name")
+        self._nmtensor_naming_dict[new_name] = tensor.unique_name
+
+    def __getitem__(self, key: str):
+        """
+        Object getter function.
+
+        Args:
+            key: Object name.
+
+        Returns:
+            Object associated with the key.
+        """
+        # Search for an object with a given name.
+        if key in self._nmtensor_naming_dict:
+            key = self._nmtensor_naming_dict[key]
+
+        if key in self._nmtensor_uniname_dict or key == "loss":
+            return key
+
+        raise KeyError("A NmTensor with name `{}` don't exists!".format(key))
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 87e9c5b4fd50..c955d5274812 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,3 +1,4 @@
+numpy>=1.18.2
 onnx
 onnxruntime
 pandas
@@ -8,3 +9,7 @@ torch
 torchvision
 wget
 wrapt
+ruamel.yaml
+scikit-learn
+scipy
+
diff --git a/requirements/requirements_asr.txt b/requirements/requirements_asr.txt
index 439f30fa6af1..0edb80411639 100644
--- a/requirements/requirements_asr.txt
+++ b/requirements/requirements_asr.txt
@@ -1,11 +1,15 @@
+braceexpand
 frozendict
 inflect
 kaldi-io
 librosa
 marshmallow
+packaging
 num2words
 ruamel.yaml
 soundfile
 sox
 torch-stft
-unidecode
\ No newline at end of file
+unidecode
+webdataset
+kaldi-python-io
diff --git a/requirements/requirements_docs.txt b/requirements/requirements_docs.txt
new file mode 100644
index 000000000000..0f7f357fb67c
--- /dev/null
+++ b/requirements/requirements_docs.txt
@@ -0,0 +1,4 @@
+latexcodec
+sphinx_rtd_theme
+sphinxcontrib-bibtex
+wrapt
diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt
index ebdb41560653..3b31e4574fed 100644
--- a/requirements/requirements_nlp.txt
+++ b/requirements/requirements_nlp.txt
@@ -3,6 +3,12 @@ h5py
 matplotlib
 sentencepiece
 torchtext
-transformers
+transformers>=2.11.0
 unidecode
-youtokentome
\ No newline at end of file
+youtokentome
+numpy
+tqdm
+rapidfuzz
+gdown
+megatron-lm
+inflect
diff --git a/requirements/requirements_simple_gan.txt b/requirements/requirements_simple_gan.txt
index 8f59cf99bbac..6ccafc3f904b 100644
--- a/requirements/requirements_simple_gan.txt
+++ b/requirements/requirements_simple_gan.txt
@@ -1,2 +1 @@
 matplotlib
-torchvision
\ No newline at end of file
diff --git a/requirements/requirements_test.txt b/requirements/requirements_test.txt
index ddd891eac3fe..0a9afca34856 100644
--- a/requirements/requirements_test.txt
+++ b/requirements/requirements_test.txt
@@ -1,8 +1,11 @@
+black
+isort[requirements]
+onnxruntime
 parameterized
 pytest
 pytest-runner
-black
-isort[requirements]
+ruamel.yaml
+sphinx
+sphinxcontrib-bibtex
 wrapt
 wget
-onnxruntime
diff --git a/requirements/requirements_tts.txt b/requirements/requirements_tts.txt
index c7263f06e583..3d5ac563c873 100644
--- a/requirements/requirements_tts.txt
+++ b/requirements/requirements_tts.txt
@@ -1,5 +1,3 @@
-librosa
 matplotlib
 pypinyin
-ruamel.yaml
-scipy
\ No newline at end of file
+attrdict
diff --git a/scripts/convert_to_tarred_audio_dataset.py b/scripts/convert_to_tarred_audio_dataset.py
new file mode 100644
index 000000000000..46f8036908ce
--- /dev/null
+++ b/scripts/convert_to_tarred_audio_dataset.py
@@ -0,0 +1,117 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This script converts an existing audio dataset with a manifest to
+# a tarred and sharded audio dataset that can be read by the
+# TarredAudioToTextDataLayer.
+
+import argparse
+import json
+import os
+import random
+import tarfile
+
+parser = argparse.ArgumentParser(
+    description="Convert an existing ASR dataset to tarballs compatible with TarredAudioToTextDataLayer."
+)
+parser.add_argument(
+    "--manifest_path", default=None, type=str, required=True, help="Path to the existing dataset's manifest."
+)
+
+# Optional arguments
+parser.add_argument(
+    "--target_dir",
+    default='./tarred',
+    type=str,
+    help="Target directory for resulting tarballs and manifest. Defaults to `./tarred`. Creates the path if ncessary.",
+)
+parser.add_argument(
+    "--num_shards",
+    default=1,
+    type=int,
+    help="Number of shards (tarballs) to create. Used for partitioning data among workers.",
+)
+parser.add_argument(
+    "--shuffle",
+    action='store_true',
+    help="Whether or not to randomly shuffle the samples in the manifest before tarring/sharding.",
+)
+args = parser.parse_args()
+
+
+def create_shard(entries, target_dir, new_entries, shard_id):
+    """Creates a tarball containing the audio files from `entries`.
+    """
+    tar = tarfile.open(os.path.join(target_dir, f'audio_{shard_id}.tar'), mode='w')
+
+    for entry in entries:
+        # We squash the filename since we do not preserve directory structure of audio files in the tarball.
+        base, ext = os.path.splitext(entry['audio_filepath'])
+        base = base.replace('/', '_')
+        # Need the following replacement as long as WebDataset splits on first period
+        base = base.replace('.', '_')
+        squashed_filename = f'{base}{ext}'
+        tar.add(entry['audio_filepath'], arcname=squashed_filename)
+
+        new_entry = {
+            'audio_filepath': squashed_filename,
+            'duration': entry['duration'],
+            'text': entry['text'],
+            'shard_id': shard_id,  # Keep shard ID for recordkeeping
+        }
+        new_entries.append(new_entry)
+
+    tar.close()
+
+
+def main():
+    manifest_path = args.manifest_path
+    target_dir = args.target_dir
+    num_shards = args.num_shards
+    shuffle = args.shuffle
+
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+
+    # Read the existing manifest
+    entries = []
+    with open(manifest_path, 'r') as m:
+        for line in m:
+            entries.append(json.loads(line))
+
+    if shuffle:
+        print("Shuffling...")
+        random.shuffle(entries)
+
+    # Create shards and updated manifest entries
+    new_entries = []
+    for i in range(num_shards):
+        start_idx = (len(entries) // num_shards) * i
+        end_idx = start_idx + (len(entries) // num_shards)
+        if i == num_shards - 1:
+            end_idx = len(entries)  # Last shard gets the leftovers.
+        print(f"Shard {i} will have {end_idx - start_idx} entries.")
+
+        create_shard(entries[start_idx:end_idx], target_dir, new_entries, i)
+
+    # Write manifest
+    new_manifest_path = os.path.join(target_dir, 'tarred_audio_manifest.json')
+    with open(new_manifest_path, 'w') as m2:
+        for entry in new_entries:
+            json.dump(entry, m2)
+            m2.write('\n')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/convert_wav_to_g711wav.py b/scripts/convert_wav_to_g711wav.py
new file mode 100644
index 000000000000..f882e5fc64cc
--- /dev/null
+++ b/scripts/convert_wav_to_g711wav.py
@@ -0,0 +1,93 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# USAGE:
+# python convert_wav_to_g711wav.py \
+#   --data_dir=<dir with .wav files> \
+#   --dest_dir=<destination dir root>
+#
+# Converts all wav audio files to PCM u-law wav files (8kHz, 8-bit).
+# Requires sox to be installed.
+import argparse
+import concurrent.futures
+import glob
+import logging
+import os
+import subprocess
+
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(description='Convert wav audio to pcm mulaw wav')
+parser.add_argument(
+    "--data_dir", default=None, type=str, required=True, help="The path to the input directory with .wav files.",
+)
+parser.add_argument(
+    "--dest_dir", default=None, type=str, required=True, help="Path to the destination directory.",
+)
+args = parser.parse_args()
+
+
+def __convert_audio(in_path, out_path):
+    """
+    Helper function that's called per thread, converts wav to G.711 wav.
+    Args:
+        in_path: source wav file to convert
+        out_path: destination for G.711 wav file
+    """
+    cmd = ["sox", in_path, "-r", "8000", "-c", "1", "-e", "u-law", out_path]
+    subprocess.run(cmd)
+
+
+def __process_set(data_dir, dst_root):
+    """
+    Finds and converts all wav audio files in the given directory to pcm_mulaw.
+    Args:
+        data_dir: source directory with wav files to convert
+        dst_root: where G.711 (pcm_mulaw) wav files will be stored
+    """
+    wav_list = glob.glob(data_dir)
+
+    if not os.path.exists(dst_root):
+        os.makedirs(dst_root)
+
+    # Set up and execute concurrent audio conversion
+    tp = concurrent.futures.ProcessPoolExecutor(max_workers=64)
+    futures = []
+
+    for wav_path in tqdm(wav_list, desc="Submitting wav futures", unit="file"):
+        audio_id = os.path.basename(wav_path)
+        out_path = os.path.join(dst_root, audio_id)
+        futures.append(tp.submit(__convert_audio, wav_path, out_path))
+
+    pbar = tqdm(total=len(wav_list), desc="Converting wav files", unit="file")
+    count = 0
+    for f in concurrent.futures.as_completed(futures):
+        count += 1
+        pbar.update()
+    tp.shutdown()
+    pbar.close()
+
+
+def main():
+    data_dir = args.data_dir
+    dest_dir = args.dest_dir
+
+    logging.info("\n\nConverting audio in {}", data_dir)
+    __process_set(
+        os.path.join(data_dir, "*.wav",), os.path.join(dest_dir),
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/export_bert_to_trt.py b/scripts/export_bert_to_trt.py
index 566446a6f2aa..230fd779ab80 100644
--- a/scripts/export_bert_to_trt.py
+++ b/scripts/export_bert_to_trt.py
@@ -16,33 +16,40 @@
 import argparse
 import ctypes
 import json
+import os
+import os.path
 import re
+import sys
+import time
 
 import numpy as np
 import tensorrt as trt
-import torch
 
-from nemo import logging
+# from helpers.calibrator import BertCalibrator as BertCalibrator
 
-nvinfer = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)
-cm = ctypes.CDLL("libcommon.so", mode=ctypes.RTLD_GLOBAL)
-pg = ctypes.CDLL("libbert_plugins.so", mode=ctypes.RTLD_GLOBAL)
+try:
+    import torch
+except ImportError as err:
+    sys.stderr.write("""Error: Failed to import tensorflow module ({})\n""".format(err))
+    sys.exit()
 
 """
 TensorRT Initialization
 """
-TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
+TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+
+handle = ctypes.CDLL("libnvinfer_plugin.so", mode=ctypes.RTLD_GLOBAL)
+if not handle:
+    raise RuntimeError("Could not load plugin library. Is `libnvinfer_plugin.so` on your LD_LIBRARY_PATH?")
+
 trt.init_libnvinfer_plugins(TRT_LOGGER, "")
 plg_registry = trt.get_plugin_registry()
 qkv2_plg_creator = plg_registry.get_plugin_creator("CustomQKVToContextPluginDynamic", "1", "")
 skln_plg_creator = plg_registry.get_plugin_creator("CustomSkipLayerNormPluginDynamic", "1", "")
 gelu_plg_creator = plg_registry.get_plugin_creator("CustomGeluPluginDynamic", "1", "")
 emln_plg_creator = plg_registry.get_plugin_creator("CustomEmbLayerNormPluginDynamic", "1", "")
+fc_plg_creator = plg_registry.get_plugin_creator("CustomFCPluginDynamic", "1", "")
 
-logging.info(
-    "creators:", plg_registry, qkv2_plg_creator, skln_plg_creator, gelu_plg_creator, emln_plg_creator,
-)
-logging.info("\n".join([x.name for x in plg_registry.plugin_creator_list]))
 
 """
 Attentions Keys
@@ -56,6 +63,7 @@
 WQKV = "qkv_weight"
 BQKV = "qkv_bias"
 
+
 """
 Transformer Keys
 """
@@ -70,31 +78,34 @@
 LOUT_LN_BETA = "output_layernorm_bias"
 LOUT_LN_GAMMA = "output_layernorm_weight"
 
-# Pooler Keys
-POOL_W = "pooler_dense_weight"
-POOL_B = "pooler_dense_bias"
 
-# classifier Output Keys
+"""
+Squad Output Keys
+"""
 SQD_W = "weight"
 SQD_B = "bias"
 
 
 class BertConfig:
-    def __init__(self, bert_config_path):
+    def __init__(self, bert_config_path, use_fp16, use_int8, use_strict, use_fc2_gemm):
         with open(bert_config_path, 'r') as f:
             data = json.load(f)
             self.num_attention_heads = data['num_attention_heads']
             self.hidden_size = data['hidden_size']
             self.intermediate_size = data['intermediate_size']
             self.num_hidden_layers = data['num_hidden_layers']
-            self.use_fp16 = True
+            self.use_fp16 = use_fp16
+            self.use_int8 = use_int8
+            self.use_fc2_gemm = use_fc2_gemm
+            self.use_strict = use_strict
+            self.head_size = self.hidden_size // self.num_attention_heads
 
 
 def set_tensor_name(tensor, prefix, name):
     tensor.name = prefix + name
 
 
-def set_layer_name(layer, prefix, name, out_idx=0):
+def set_output_name(layer, prefix, name, out_idx=0):
     set_tensor_name(layer.get_output(out_idx), prefix, name)
 
 
@@ -105,31 +116,37 @@ def attention_layer_opt(prefix, config, init_dict, network, input_tensor, imask)
     assert len(input_tensor.shape) == 5
     B, S, hidden_size, _, _ = input_tensor.shape
     num_heads = config.num_attention_heads
-    head_size = int(hidden_size / num_heads)
 
     Wall = init_dict[prefix + WQKV]
     Ball = init_dict[prefix + BQKV]
 
-    mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
-    set_layer_name(mult_all, prefix, "qkv_mult")
+    # FC_attention
+    if config.use_int8:
+        mult_all = network.add_convolution(input_tensor, 3 * hidden_size, (1, 1), Wall, Ball)
+    else:
+        mult_all = network.add_fully_connected(input_tensor, 3 * hidden_size, Wall, Ball)
+
+    set_output_name(mult_all, prefix, "qkv_mult")
 
     has_mask = imask is not None
 
-    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32,)
+    pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32)
+    pf_hidden_size = trt.PluginField("hidden_size", np.array([hidden_size], np.int32), trt.PluginFieldType.INT32)
     pf_num_heads = trt.PluginField("num_heads", np.array([num_heads], np.int32), trt.PluginFieldType.INT32)
-    pf_S = trt.PluginField("S", np.array([S], np.int32), trt.PluginFieldType.INT32)
     pf_has_mask = trt.PluginField("has_mask", np.array([has_mask], np.int32), trt.PluginFieldType.INT32)
 
-    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_S, pf_has_mask])
+    pfc = trt.PluginFieldCollection([pf_hidden_size, pf_num_heads, pf_has_mask, pf_type])
     qkv2ctx_plug = qkv2_plg_creator.create_plugin("qkv2ctx", pfc)
 
-    qkv_in = [mult_all.get_output(0), imask]
+    qkv_in = [mult_all.get_output(0)]
+    if has_mask:
+        qkv_in.append(imask)
     qkv2ctx = network.add_plugin_v2(qkv_in, qkv2ctx_plug)
-    set_layer_name(qkv2ctx, prefix, "context_layer")
+    set_output_name(qkv2ctx, prefix, "context_layer")
     return qkv2ctx
 
 
-def skipln(prefix, init_dict, network, input_tensor, skip):
+def skipln(prefix, config, init_dict, network, input_tensor, skip, bias=None):
     """
     Add the skip layer
     """
@@ -142,16 +159,33 @@ def skipln(prefix, init_dict, network, input_tensor, skip):
     pf_beta = trt.PluginField("beta", wbeta.numpy(), trt.PluginFieldType.FLOAT32)
     wgamma = init_dict[prefix + "weight"]
     pf_gamma = trt.PluginField("gamma", wgamma.numpy(), trt.PluginFieldType.FLOAT32)
+    pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32)
 
-    pfc = trt.PluginFieldCollection([pf_ld, pf_beta, pf_gamma])
+    fields = [pf_ld, pf_beta, pf_gamma, pf_type]
+
+    if bias:
+        pf_bias = trt.PluginField("bias", bias.numpy(), trt.PluginFieldType.FLOAT32)
+        fields.append(pf_bias)
+
+    pfc = trt.PluginFieldCollection(fields)
     skipln_plug = skln_plg_creator.create_plugin("skipln", pfc)
 
     skipln_inputs = [input_tensor, skip]
     layer = network.add_plugin_v2(skipln_inputs, skipln_plug)
-    layer.name = prefix + 'skiplayer'
     return layer
 
 
+def my_fc(config, network, input_tensor, out_dims, W):
+    pf_out_dims = trt.PluginField('out_dims', np.array([out_dims], dtype=np.int32), trt.PluginFieldType.INT32)
+    pf_W = trt.PluginField('W', W.numpy(), trt.PluginFieldType.FLOAT32)
+    pf_type = trt.PluginField("type_id", np.array([1 if config.use_fp16 else 0], np.int32), trt.PluginFieldType.INT32)
+    pfc = trt.PluginFieldCollection([pf_out_dims, pf_W, pf_type])
+    fc_plugin = fc_plg_creator.create_plugin('fcplugin', pfc)
+    plug_inputs = [input_tensor]
+    out_dense = network.add_plugin_v2(plug_inputs, fc_plugin)
+    return out_dense
+
+
 def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imask):
     """
     Add the transformer layer
@@ -161,40 +195,98 @@ def transformer_layer_opt(prefix, config, init_dict, network, input_tensor, imas
     hidden_size = idims[2]
 
     context_transposed = attention_layer_opt(
-        prefix + "attention_self_", config, init_dict, network, input_tensor, imask,
+        prefix + "attention_self_", config, init_dict, network, input_tensor, imask
     )
     attention_heads = context_transposed.get_output(0)
 
-    W_aout = init_dict[prefix + W_AOUT]
+    # FC0
     B_aout = init_dict[prefix + B_AOUT]
-    attention_out_fc = network.add_fully_connected(attention_heads, hidden_size, W_aout, B_aout)
+    if config.use_int8:
+        W_aout = init_dict[prefix + W_AOUT]
+        attention_out_fc = network.add_convolution(attention_heads, hidden_size, (1, 1), W_aout, B_aout)
+        B_aout = None
+
+        if config.use_fp16:
+            attention_out_fc.precision = trt.DataType.INT8
+            attention_out_fc.set_output_type(0, trt.DataType.HALF)
+    else:
+        W_aoutT = init_dict[prefix + W_AOUT + '_trans']
+        attention_out_fc = my_fc(config, network, attention_heads, hidden_size, W_aoutT)
 
     skiplayer = skipln(
-        prefix + "attention_output_layernorm_", init_dict, network, attention_out_fc.get_output(0), input_tensor,
+        prefix + "attention_output_layernorm_",
+        config,
+        init_dict,
+        network,
+        attention_out_fc.get_output(0),
+        input_tensor,
+        B_aout,
     )
     attention_ln = skiplayer.get_output(0)
 
-    W_mid = init_dict[prefix + W_MID]
+    # FC1 + GELU
     B_mid = init_dict[prefix + B_MID]
-    mid_dense = network.add_fully_connected(attention_ln, config.intermediate_size, W_mid, B_mid)
+    W_mid = init_dict[prefix + W_MID]
+    if config.use_int8:
+        mid_dense = network.add_convolution(attention_ln, config.intermediate_size, (1, 1), W_mid, B_mid)
+    else:
+        mid_dense = network.add_fully_connected(attention_ln, config.intermediate_size, W_mid, B_mid)
 
     mid_dense_out = mid_dense.get_output(0)
-
-    pfc = trt.PluginFieldCollection()
-    plug = gelu_plg_creator.create_plugin("gelu", pfc)
-
-    gelu_layer = network.add_plugin_v2([mid_dense_out], plug)
+    POW = network.add_constant((1, 1, 1, 1, 1), trt.Weights(np.ascontiguousarray([3.0], dtype=np.float32)))
+    MULTIPLY = network.add_constant((1, 1, 1, 1, 1), trt.Weights(np.ascontiguousarray([0.044715], dtype=np.float32)))
+    SQRT = network.add_constant(
+        (1, 1, 1, 1, 1), trt.Weights((np.ascontiguousarray([0.79788456080286535587989211986876], dtype=np.float32)))
+    )
+    ONE = network.add_constant((1, 1, 1, 1, 1), trt.Weights((np.ascontiguousarray([1.0], dtype=np.float32))))
+    HALF = network.add_constant((1, 1, 1, 1, 1), trt.Weights((np.ascontiguousarray([0.5], dtype=np.float32))))
+    X_pow = network.add_elementwise(mid_dense_out, POW.get_output(0), trt.ElementWiseOperation.POW)
+    X_pow_t = X_pow.get_output(0)
+    X_mul = network.add_elementwise(X_pow_t, MULTIPLY.get_output(0), trt.ElementWiseOperation.PROD)
+    X_add = network.add_elementwise(mid_dense_out, X_mul.get_output(0), trt.ElementWiseOperation.SUM)
+    X_sqrt = network.add_elementwise(X_add.get_output(0), SQRT.get_output(0), trt.ElementWiseOperation.PROD)
+    X_sqrt_tensor = X_sqrt.get_output(0)
+    X_tanh = network.add_activation(X_sqrt_tensor, trt.ActivationType.TANH)
+    X_tanh_tensor = X_tanh.get_output(0)
+    X_one = network.add_elementwise(X_tanh_tensor, ONE.get_output(0), trt.ElementWiseOperation.SUM)
+    CDF = network.add_elementwise(X_one.get_output(0), HALF.get_output(0), trt.ElementWiseOperation.PROD)
+    gelu_layer = network.add_elementwise(CDF.get_output(0), mid_dense_out, trt.ElementWiseOperation.PROD)
+
+    # enable elementwise fusing for int8 && fp16
+    POW.precision = trt.DataType.FLOAT
+    MULTIPLY.precision = trt.DataType.FLOAT
+    SQRT.precision = trt.DataType.FLOAT
+    ONE.precision = trt.DataType.FLOAT
+    HALF.precision = trt.DataType.FLOAT
+    X_pow.precision = trt.DataType.FLOAT
+    X_mul.precision = trt.DataType.FLOAT
+    X_add.precision = trt.DataType.FLOAT
+    X_sqrt.precision = trt.DataType.FLOAT
+    X_tanh.precision = trt.DataType.FLOAT
+    X_one.precision = trt.DataType.FLOAT
+    CDF.precision = trt.DataType.FLOAT
+    gelu_layer.precision = trt.DataType.FLOAT
 
     intermediate_act = gelu_layer.get_output(0)
     set_tensor_name(intermediate_act, prefix, "gelu")
+    if config.use_int8 and config.use_strict:
+        intermediate_act.set_dynamic_range(-10, 10)
 
+    # FC2
     # Dense to hidden size
-    W_lout = init_dict[prefix + W_LOUT]
     B_lout = init_dict[prefix + B_LOUT]
-
-    out_dense = network.add_fully_connected(intermediate_act, hidden_size, W_lout, B_lout)
-    set_layer_name(out_dense, prefix + "output_", "dense")
-    out_layer = skipln(prefix + "output_layernorm_", init_dict, network, out_dense.get_output(0), attention_ln,)
+    if config.use_int8 and config.use_strict and not config.use_fc2_gemm:
+        W_lout = init_dict[prefix + W_LOUT]
+        out_dense = network.add_convolution(intermediate_act, hidden_size, (1, 1), W_lout, B_lout)
+        B_lout = None
+    else:
+        W_loutT = init_dict[prefix + W_LOUT + '_trans']
+        out_dense = my_fc(config, network, intermediate_act, hidden_size, W_loutT)
+
+    set_output_name(out_dense, prefix + "output_", "dense")
+    out_layer = skipln(
+        prefix + "output_layernorm_", config, init_dict, network, out_dense.get_output(0), attention_ln, B_lout
+    )
     out_ln = out_layer.get_output(0)
 
     set_tensor_name(out_ln, prefix + "output_", "reshape")
@@ -213,60 +305,31 @@ def bert_model(config, init_dict, network, input_tensor, input_mask):
     return prev_input
 
 
-# first_token_tensor = hidden_states[:, 0]
-# pooled_output = self.dense(first_token_tensor), nn.Linear(
-# config.hidden_size, config.hidden_size)
-# pooled_output = self.activation(pooled_output), nn.tanh
-
-
-def bert_pooler(prefix, init_dict, network, input_tensor):
-    """
-    pooler the bert output
-    """
-    seq_len = input_tensor.shape[0]
-    hidden_size = input_tensor.shape[1]
-
-    shuf = network.add_shuffle(input_tensor)
-    shuf.first_transpose = (2, 3, 0, 1)
-
-    first_token_tensor = network.add_slice(
-        shuf.get_output(0), start=(0, 0, 0, 0), shape=(1, 1, 1, hidden_size), stride=(1, 1, 1, 1),
-    )
-
-    W_out = init_dict[prefix + POOL_W]
-    B_out = init_dict[prefix + POOL_B]
-    pooler = network.add_fully_connected(first_token_tensor.get_output(0), hidden_size, W_out, B_out)
-
-    pooler = network.add_activation(pooler.get_output(0), trt.ActivationType.TANH)
-    set_layer_name(pooler, prefix, "pooler")
-
-    return pooler.get_output(0)
-
-
-def squad_output(prefix, init_dict, network, input_tensor):
+def squad_output(prefix, config, init_dict, network, input_tensor):
     """
     Create the squad output
     """
 
     idims = input_tensor.shape
     assert len(idims) == 5
+    B, S, hidden_size, _, _ = idims
 
     W_out = init_dict[prefix + SQD_W]
     B_out = init_dict[prefix + SQD_B]
 
     dense = network.add_fully_connected(input_tensor, 2, W_out, B_out)
-    set_layer_name(dense, prefix, "dense")
-    return dense
+
+    OUT = network.add_shuffle(dense.get_output(0))
+    OUT.second_transpose = (1, 0, 2, 3, 4)
+    return OUT
 
 
 def sequence_class_output(prefix, init_dict, network, input_tensor, softmax=True):
-    logging.info(input_tensor.shape)
-    seq_len = input_tensor.shape[1]
+    # (seq_len, batch, hidden size, 1, 1)
     hidden_size = input_tensor.shape[2]
 
     shuf = network.add_shuffle(input_tensor)
-    shuf.first_transpose = (0, 3, 4, 1, 2)
-    logging.info("seq class in: ", shuf.get_output(0).shape)
+    shuf.first_transpose = (1, 3, 4, 0, 2)  # target = (batch, 1, 1, seq_len, hidden_size)
 
     in_shape_tensor = network.add_shape(shuf.get_output(0)).get_output(0)
     out_shape_tensor = network.add_gather(
@@ -295,10 +358,9 @@ def sequence_class_output(prefix, init_dict, network, input_tensor, softmax=True
         probs.axes = 4  # last dimension
         classifier = probs
     classifier = network.add_shuffle(classifier.get_output(0))
-    classifier.reshape_dims = trt.Dims([0, -1])
+    classifier.reshape_dims = trt.Dims([0, W_out.shape[0]])
 
-    set_layer_name(classifier, prefix, "classifier")
-    logging.info("seq class: ", classifier.get_output(0).shape)
+    set_output_name(classifier, prefix, "classifier")
     return classifier
 
 
@@ -315,15 +377,14 @@ def token_class_output(prefix, init_dict, network, input_tensor, softmax=True):
         probs = network.add_softmax(classifier.get_output(0))
         probs.axes = 4  # last dimension
         classifier = probs
-    set_layer_name(classifier, prefix, "classifier")
+    set_output_name(classifier, prefix, "classifier")
     classifier = network.add_shuffle(classifier.get_output(0))
     classifier.reshape_dims = trt.Dims([0, 0, 0])
-
-    logging.info("tok class: ", classifier.get_output(0).shape)
+    classifier.second_transpose = (1, 0, 2, 3, 4)
     return classifier
 
 
-def load_weights(inputbase):
+def load_weights(inputbase, config):
     """
     Load the weights from the tensorflow checkpoint
     """
@@ -337,7 +398,7 @@ def load_weights(inputbase):
         # can be discarded
         param_names = [key for key in sorted(tensor_dict) if 'adam' not in key and 'global_step' not in key]
         count = len(param_names)
-        TRT_LOGGER.log(TRT_LOGGER.INFO, str(count))
+        TRT_LOGGER.log(TRT_LOGGER.INFO, f"Loading/transforming {str(count)} weights")
 
         for pn in param_names:
             toks = pn.lower().split('.')
@@ -348,17 +409,24 @@ def load_weights(inputbase):
             else:
                 outname = '_'.join(toks)
 
-            # convert torch tensor to numpy
             tensor = tensor_dict[pn].numpy()
+            if pn.find('weight') != -1:
+                weights_dict[outname + '_trans'] = trt.Weights(np.ascontiguousarray(np.transpose(tensor)).flatten())
+                TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Transposing {}\n".format(pn))
+
+            # convert torch tensor to numpy
             shape = tensor.shape
             flat_tensor = tensor.flatten()
             shape_str = '{} '.format(len(shape)) + ' '.join([str(d) for d in shape])
             weights_dict[outname] = trt.Weights(flat_tensor)
 
             TRT_LOGGER.log(
-                TRT_LOGGER.INFO, "Orig.name: {:}, TRT name: {:}, shape: {:}".format(pn, outname, shape_str),
+                TRT_LOGGER.VERBOSE, "Orig.name: {:}, TRT name: {:}, shape: {:}".format(pn, outname, shape_str),
             )
 
+        N = config.num_attention_heads
+        H = config.head_size
+
         additional_dict = dict()
         for key, value in weights_dict.items():
             pos = key.find(BQ)
@@ -385,6 +453,9 @@ def load_weights(inputbase):
                 Ball[hidden_size : 2 * hidden_size] = Bk_.numpy()[0:hidden_size]
                 Ball[2 * hidden_size : 3 * hidden_size] = Bv_.numpy()[0:hidden_size]
 
+                Wall = np.ascontiguousarray(Wall.reshape((3, N, H, N, H)).transpose((1, 0, 2, 3, 4)), dtype=np.float32)
+                Ball = np.ascontiguousarray(Ball.reshape((3, N, H)).transpose((1, 0, 2)), dtype=np.float32)
+
                 additional_dict[prefix + WQKV] = trt.Weights(Wall)
                 additional_dict[prefix + BQKV] = trt.Weights(Ball)
 
@@ -395,174 +466,303 @@ def load_weights(inputbase):
     return weights_dict
 
 
-def main(
-    bert_weight_path,
-    class_weight_path,
-    B,
-    S,
-    config_path,
-    outputbase,
-    min_batch=None,
-    max_batch=None,
-    seq_class_prefix=None,
-    tok_class_prefix=None,
+def emb_layernorm(
+    builder,
+    network,
+    config,
+    weights_dict,
+    builder_config,
+    sequence_length,
+    batch_size,
+    min_batch_size=None,
+    max_batch_size=None,
 ):
-    bert_config_path = config_path
-    TRT_LOGGER.log(TRT_LOGGER.INFO, bert_config_path)
-    config = BertConfig(bert_config_path)
-
-    # Load weights from checkpoint file
-    init_dict = load_weights(bert_weight_path)
-    classifiers_dict = {k: v.numpy() for k, v in torch.load(class_weight_path, map_location='cpu').items()}
-
-    #    import pdb;pdb.set_trace()
-    with trt.Builder(TRT_LOGGER) as builder:
-        ty = trt.PluginFieldType.FLOAT32
-
-        # import pdb;pdb.set_trace()
-        w = init_dict["bert_embeddings_layernorm_bias"]
-        wbeta = trt.PluginField("bert_embeddings_layernorm_beta", w.numpy(), ty)
-
-        w = init_dict["bert_embeddings_layernorm_weight"]
-        wgamma = trt.PluginField("bert_embeddings_layernorm_gamma", w.numpy(), ty)
-
-        w = init_dict["bert_embeddings_word_embeddings_weight"]
-        wwordemb = trt.PluginField("bert_embeddings_word_embeddings", w.numpy(), ty)
-
-        w = init_dict["bert_embeddings_token_type_embeddings_weight"]
-        wtokemb = trt.PluginField("bert_embeddings_token_type_embeddings", w.numpy(), ty)
+    input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1, sequence_length))
+    segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1, sequence_length))
+    input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1, sequence_length))
+
+    profile = builder.create_optimization_profile()
+    min_shape = (min_batch_size or batch_size, sequence_length)
+    shape = (batch_size, sequence_length)
+    max_shape = (max_batch_size or batch_size, sequence_length)
+    profile.set_shape("input_ids", min=min_shape, opt=shape, max=max_shape)
+    profile.set_shape("segment_ids", min=min_shape, opt=shape, max=max_shape)
+    profile.set_shape("input_mask", min=min_shape, opt=shape, max=max_shape)
+    builder_config.add_optimization_profile(profile)
+
+    input_ids_t = network.add_shuffle(input_ids)
+    input_ids_t.second_transpose = (1, 0)
+    segment_ids_t = network.add_shuffle(segment_ids)
+    segment_ids_t.second_transpose = (1, 0)
+    input_mask_t = network.add_shuffle(input_mask)
+    input_mask_t.second_transpose = (1, 0)
+
+    wbeta = trt.PluginField(
+        "bert_embeddings_layernorm_beta",
+        weights_dict["bert_embeddings_layernorm_bias"].numpy(),
+        trt.PluginFieldType.FLOAT32,
+    )
+    wgamma = trt.PluginField(
+        "bert_embeddings_layernorm_gamma",
+        weights_dict["bert_embeddings_layernorm_weight"].numpy(),
+        trt.PluginFieldType.FLOAT32,
+    )
+    wwordemb = trt.PluginField(
+        "bert_embeddings_word_embeddings",
+        weights_dict["bert_embeddings_word_embeddings_weight"].numpy(),
+        trt.PluginFieldType.FLOAT32,
+    )
+    wtokemb = trt.PluginField(
+        "bert_embeddings_token_type_embeddings",
+        weights_dict["bert_embeddings_token_type_embeddings_weight"].numpy(),
+        trt.PluginFieldType.FLOAT32,
+    )
+    wposemb = trt.PluginField(
+        "bert_embeddings_position_embeddings",
+        weights_dict["bert_embeddings_position_embeddings_weight"].numpy(),
+        trt.PluginFieldType.FLOAT32,
+    )
 
-        w = init_dict["bert_embeddings_position_embeddings_weight"]
-        wposemb = trt.PluginField("bert_embeddings_position_embeddings", w.numpy(), ty)
+    output_fp16 = trt.PluginField(
+        "output_fp16", np.array([1 if config.use_fp16 else 0]).astype(np.int32), trt.PluginFieldType.INT32
+    )
 
-        pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb])
-        fn = emln_plg_creator.create_plugin("embeddings", pfc)
+    pfc = trt.PluginFieldCollection([wbeta, wgamma, wwordemb, wtokemb, wposemb, output_fp16])
+    fn = emln_plg_creator.create_plugin("embeddings", pfc)
+
+    inputs = [input_ids_t.get_output(0), segment_ids_t.get_output(0), input_mask_t.get_output(0)]
+    emb_layer = network.add_plugin_v2(inputs, fn)
+    set_output_name(emb_layer, "embeddings_", "output")
+    return emb_layer
+
+
+def build_engine(
+    batch_size,
+    sequence_length,
+    config,
+    weights_dict,
+    classifiers_dict,
+    squad_json,
+    vocab_file,
+    calibrationCacheFile,
+    calib_num,
+    tok_class_prefix=None,
+    seq_class_prefix=None,
+    qa_prefix=None,
+    min_batch_size=None,
+    max_batch_size=None,
+):
+    explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
 
-        explicit_batch_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
-        with builder.create_network(explicit_batch_flag) as network, builder.create_builder_config() as builder_config:
-            builder_config.max_workspace_size = 5000 * (1024 * 1024)  # 5000 MiB
+    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
+        explicit_batch_flag
+    ) as network, builder.create_builder_config() as builder_config:
+        builder_config.max_workspace_size = 5000 * (1024 * 1024)  # 5000 MiB
+        if config.use_fp16:
             builder_config.set_flag(trt.BuilderFlag.FP16)
-
-            input_ids = network.add_input(name="input_ids", dtype=trt.int32, shape=(-1, S,))
-            segment_ids = network.add_input(name="segment_ids", dtype=trt.int32, shape=(-1, S,))
-            input_mask = network.add_input(name="input_mask", dtype=trt.int32, shape=(-1, S,))
-
-            def set_profile_shape(profile, batch_size, min_batch=None, max_batch=None):
-                opt_shape = (batch_size, S)
-                min_shape = (min_batch or batch_size, S)
-                max_shape = (max_batch or batch_size, S)
-                profile.set_shape("input_ids", min=min_shape, opt=opt_shape, max=max_shape)
-                profile.set_shape("segment_ids", min=min_shape, opt=opt_shape, max=max_shape)
-                profile.set_shape("input_mask", min=min_shape, opt=opt_shape, max=max_shape)
-
-            # Specify only a single profile for now, even though this is
-            # less optimal
-            bs1_profile = builder.create_optimization_profile()
-            set_profile_shape(bs1_profile, B, min_batch=min_batch, max_batch=max_batch)
-            builder_config.add_optimization_profile(bs1_profile)
-
-            inputs = [input_ids, segment_ids, input_mask]
-            emb_layer = network.add_plugin_v2(inputs, fn)
-
-            embeddings = emb_layer.get_output(0)
-            mask_idx = emb_layer.get_output(1)
-
-            bert_out = bert_model(config, init_dict, network, embeddings, mask_idx)
-
-            if tok_class_prefix is not None:
-                token_class = token_class_output(tok_class_prefix, classifiers_dict, network, bert_out)
-                token_class_logits_out = token_class.get_output(0)
-                token_class_logits_out.name = "token_logits"
-                token_class_logits_out.dtype = trt.DataType.FLOAT
-                network.mark_output(token_class_logits_out)
-
-            if seq_class_prefix is not None:
-                seq_class = sequence_class_output(seq_class_prefix, classifiers_dict, network, bert_out)
-                seq_class_logits_out = seq_class.get_output(0)
-                seq_class_logits_out.name = "seq_logits"
-                seq_class_logits_out.dtype = trt.DataType.FLOAT
-                network.mark_output(seq_class_logits_out)
-
-            with builder.build_engine(network, builder_config) as engine:
-                TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
-                serialized_engine = engine.serialize()
-                TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(outputbase))
-                with open(outputbase, 'wb') as fout:
-                    fout.write(serialized_engine)
-                TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='TensorRT BERT Sample')
+        if config.use_int8:
+            calibrator = BertCalibrator(squad_json, vocab_file, calibrationCacheFile, 1, sequence_length, calib_num)
+            builder_config.set_flag(trt.BuilderFlag.INT8)
+            builder_config.int8_calibrator = calibrator
+        if config.use_strict:
+            builder_config.set_flag(trt.BuilderFlag.STRICT_TYPES)
+
+        # Create the network
+        emb_layer = emb_layernorm(
+            builder,
+            network,
+            config,
+            weights_dict,
+            builder_config,
+            sequence_length,
+            batch_size,
+            min_batch_size,
+            max_batch_size,
+        )
+        embeddings = emb_layer.get_output(0)
+        mask_idx = emb_layer.get_output(1)
+
+        bert_out = bert_model(config, weights_dict, network, embeddings, mask_idx)
+        if not classifiers_dict:
+            classifiers_dict = weights_dict
+
+        if tok_class_prefix is not None:
+            TRT_LOGGER.log(TRT_LOGGER.INFO, f"Configuring head for token classification: {tok_class_prefix}")
+            token_class = token_class_output(tok_class_prefix, classifiers_dict, network, bert_out)
+            token_class_logits_out = token_class.get_output(0)
+            network.mark_output(token_class_logits_out)
+            token_class_logits_out.name = "token_logits"
+            token_class_logits_out.dtype = trt.DataType.FLOAT
+
+        if seq_class_prefix is not None:
+            TRT_LOGGER.log(TRT_LOGGER.INFO, f"Configuring head for sequence classification: {seq_class_prefix}")
+            seq_class = sequence_class_output(seq_class_prefix, classifiers_dict, network, bert_out)
+            seq_class_logits_out = seq_class.get_output(0)
+            network.mark_output(seq_class_logits_out)
+            seq_class_logits_out.name = "seq_logits"
+            seq_class_logits_out.dtype = trt.DataType.FLOAT
+
+        if qa_prefix is not None:
+            TRT_LOGGER.log(TRT_LOGGER.INFO, f"Configuring head for question answering: {qa_prefix}")
+            qa_logits = squad_output(qa_prefix, config, classifiers_dict, network, bert_out)
+            qa_logits_out = qa_logits.get_output(0)
+            network.mark_output(qa_logits_out)
+            qa_logits_out.name = "qa_logits"
+            qa_logits_out.dtype = trt.DataType.FLOAT
+
+        build_start_time = time.time()
+        TRT_LOGGER.log(TRT_LOGGER.INFO, f"Starting engine build")
+        engine = builder.build_engine(network, builder_config)
+        build_time_elapsed = time.time() - build_start_time
+        TRT_LOGGER.log(TRT_LOGGER.INFO, "Built engine in {:.3f} Sec".format(build_time_elapsed))
+        if config.use_int8:
+            calibrator.free()
+        return engine
+
+
+def generate_calibration_cache(sequence_length, config, weights_dict, squad_json, vocab_file, calib_num):
+    # dynamic shape not working with calibration, so we need generate a calibration cache first using fulldims network
+    calibrationCacheFile = "bertSquadCalibCache"
+    if not config.use_int8 or os.path.exists(calibrationCacheFile):
+        return calibrationCacheFile
+
+    # generate calibration cache
+    saved_use_fp16 = config.use_fp16
+    config.use_fp16 = False
+
+    # with build_engine([1], sequence_length, config, weights_dict, squad_json, vocab_file, calibrationCacheFile, calib_num) as engine:
+    #    TRT_LOGGER.log(TRT_LOGGER.INFO, "calibration cache generated in {:}".format(calibrationCacheFile))
+
+    config.use_fp16 = saved_use_fp16
+    return calibrationCacheFile
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='TensorRT BERT Sample', formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
     parser.add_argument('-bw', '--bert-weight', required=True, help='bert weight from nemo')
     parser.add_argument(
-        '-cw', '--class-weight', required=True, help='classifier weight from nemo',
+        '-cw', '--class-weight', required=False, default=None, help='classifier weight from nemo',
     )
-
     parser.add_argument(
         '-t', '--token-classifier', required=False, default=None, help="Name of the token classifier",
     )
     parser.add_argument(
         '-s', '--seq-classifier', required=False, default=None, help="Name of the sequence classifier",
     )
-
     parser.add_argument(
-        '-o', '--output', required=True, help='The bert engine file, ex bert.engine',
+        '-qa', '--qa', required=False, default=None, help="Name of the Question Answering classifier",
     )
     parser.add_argument(
-        '-b', '--batch-size', type=int, required=False, default=1, help='Preferred batch size (default = 1)',
+        '-o', '--output', required=True, help='The bert engine file, ex bert.engine',
     )
     parser.add_argument(
-        '--max-batch-size',
+        '--min-batch-size',
         type=int,
         required=False,
         default=None,
-        help='Maximum batch size (default = same as ' 'batch-size)',
+        help='Minimum batch size (default = same as ' 'batch-size)',
     )
     parser.add_argument(
-        '--min-batch-size',
+        '--max-batch-size',
         type=int,
         required=False,
         default=None,
-        help='Minimum batch size (default = same as ' 'batch-size)',
+        help='Maximum batch size (default = same as ' 'batch-size)',
     )
-
     parser.add_argument(
-        '-l',
-        '--seq-length',
+        '-b',
+        '--batch-size',
+        default=1,
         type=int,
         required=False,
-        default=128,
-        help='Sequence length of the BERT model (default=128)',
+        help='Batch size to optimize for. The engine will be usable with any batch size below this, but may not be optimal for smaller sizes.',
     )
+    parser.add_argument('-l', '--sequence-length', default=128, help='Sequence length of the BERT model', type=int)
     parser.add_argument(
         '-c',
         '--config',
         required=True,
-        help='The folder containing the bert_config.json, '
-        'which can be downloaded e.g. from '
-        'https://github.com/google-research/bert#pre'
-        '-trained-models or by running '
-        'download_models.py in '
-        'dle/TensorFlow/LanguageModeling/BERT/'
-        'data/pretrained_models_google',
+        help='The folder containing the bert_config.json, which can be downloaded e.g. from https://github.com/google-research/bert#pre-trained-models or by running download_models.py in dle/TensorFlow/LanguageModeling/BERT/data/pretrained_models_google',
+    )
+    parser.add_argument(
+        '-f',
+        '--no-fp16',
+        action='store_true',
+        help='Indicates that inference should be run in FP16 precision',
+        required=False,
+    )
+    parser.add_argument(
+        '-i',
+        '--int8',
+        action='store_true',
+        help='Indicates that inference should be run in INT8 precision',
+        required=False,
+    )
+    parser.add_argument(
+        '--strict',
+        action='store_true',
+        help='Indicates that inference should be run in strict precision mode',
+        required=False,
+    )
+    parser.add_argument(
+        '-j',
+        '--squad-json',
+        default='squad/dev-v1.1.json',
+        help='squad json dataset used for int8 calibration',
+        required=False,
+    )
+    parser.add_argument(
+        '-v',
+        '--vocab-file',
+        default='./pre-trained_model/uncased_L-24_H-1024_A-16/vocab.txt',
+        help='Path to file containing entire understandable vocab',
+        required=False,
     )
+    parser.add_argument('-n', '--calib-num', default=100, help='calibration batch numbers', type=int)
+    parser.add_argument(
+        '-g', '--force-fc2-gemm', action='store_true', help='Force use gemm to implement FC2 layer', required=False
+    )
+
+    args, _ = parser.parse_known_args()
+
+    TRT_LOGGER.log(TRT_LOGGER.INFO, "Using configuration file: {:}".format(args.config))
+    config = BertConfig(args.config, not args.no_fp16, args.int8, args.strict, args.force_fc2_gemm)
 
-    opt = parser.parse_args()
-
-    outputbase = opt.output
-    config_path = opt.config
-    logging.info("token class:", opt.token_classifier)
-    logging.info("seq class:  ", opt.seq_classifier)
-    main(
-        opt.bert_weight,
-        opt.class_weight,
-        opt.batch_size,
-        opt.seq_length,
-        config_path,
-        outputbase,
-        min_batch=opt.min_batch_size,
-        max_batch=opt.max_batch_size,
-        tok_class_prefix=opt.token_classifier,
-        seq_class_prefix=opt.seq_classifier,
+    weights_dict = load_weights(args.bert_weight, config)
+    classifiers_dict = None
+    if args.class_weight:
+        classifiers_dict = {k: v.numpy() for k, v in torch.load(args.class_weight, map_location='cpu').items()}
+
+    # return
+    calib_cache = generate_calibration_cache(
+        args.sequence_length, config, weights_dict, args.squad_json, args.vocab_file, args.calib_num
     )
+
+    with build_engine(
+        args.batch_size,
+        args.sequence_length,
+        config,
+        weights_dict,
+        classifiers_dict,
+        args.squad_json,
+        args.vocab_file,
+        calib_cache,
+        args.calib_num,
+        tok_class_prefix=args.token_classifier,
+        seq_class_prefix=args.seq_classifier,
+        qa_prefix=args.qa,
+        min_batch_size=args.min_batch_size,
+        max_batch_size=args.max_batch_size,
+    ) as engine:
+        TRT_LOGGER.log(TRT_LOGGER.VERBOSE, "Serializing Engine...")
+        serialized_engine = engine.serialize()
+        TRT_LOGGER.log(TRT_LOGGER.INFO, "Saving Engine to {:}".format(args.output))
+        with open(args.output, 'wb') as fout:
+            fout.write(serialized_engine)
+        TRT_LOGGER.log(TRT_LOGGER.INFO, "Done.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/export_jasper_to_onnx.py b/scripts/export_jasper_to_onnx.py
index dbb24023fa2f..6df997e11c86 100644
--- a/scripts/export_jasper_to_onnx.py
+++ b/scripts/export_jasper_to_onnx.py
@@ -1,13 +1,13 @@
 # Copyright (c) 2019 NVIDIA Corporation
 import argparse
+import os
 
 import torch
 from ruamel.yaml import YAML
 
 import nemo
 import nemo.collections.asr as nemo_asr
-
-logging = nemo.logging
+from nemo.utils import logging
 
 
 def get_parser():
@@ -30,6 +30,13 @@ def get_parser():
     parser.add_argument(
         "--pre-v09-model", action="store_true", help="Use if checkpoints were generated from NeMo < v0.9",
     )
+    parser.add_argument(
+        "--decoder_type",
+        default='ctc',
+        type=str,
+        choices=['ctc', 'classification'],
+        help="Type of decoder used by the model.",
+    )
     return parser
 
 
@@ -42,6 +49,7 @@ def main(
     pre_v09_model=False,
     batch_size=1,
     time_steps=256,
+    decoder_type='ctc',
 ):
     yaml = YAML(typ="safe")
 
@@ -54,6 +62,8 @@ def main(
         num_encoder_input_features = jasper_model_definition['AudioPreprocessing']['features']
     elif 'AudioToMelSpectrogramPreprocessor' in jasper_model_definition:
         num_encoder_input_features = jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features']
+    elif 'AudioToMFCCPreprocessor' in jasper_model_definition:
+        num_encoder_input_features = jasper_model_definition['AudioToMFCCPreprocessor']['n_mfcc']
     else:
         num_encoder_input_features = 64
     num_decoder_input_features = jasper_model_definition['JasperEncoder']['jasper'][-1]['filters']
@@ -62,14 +72,30 @@ def main(
 
     nf = nemo.core.NeuralModuleFactory(create_tb_writer=False)
 
+    # Compatibility for `feat_in` defined in config file
+    if 'feat_in' in jasper_model_definition['JasperEncoder']:
+        jasper_model_definition['JasperEncoder'].pop('feat_in')
+
     logging.info("Initializing models...")
     jasper_encoder = nemo_asr.JasperEncoder(
         feat_in=num_encoder_input_features, **jasper_model_definition['JasperEncoder']
     )
 
-    jasper_decoder = nemo_asr.JasperDecoderForCTC(
-        feat_in=num_decoder_input_features, num_classes=len(jasper_model_definition['labels']),
-    )
+    if decoder_type == 'ctc':
+        jasper_decoder = nemo_asr.JasperDecoderForCTC(
+            feat_in=num_decoder_input_features, num_classes=len(jasper_model_definition['labels']),
+        )
+    elif decoder_type == 'classification':
+        if 'labels' in jasper_model_definition:
+            num_classes = len(jasper_model_definition['labels'])
+        else:
+            raise ValueError("List of class labels must be defined in model config file with key 'labels'")
+
+        jasper_decoder = nemo_asr.JasperDecoderForClassification(
+            feat_in=num_decoder_input_features, num_classes=num_classes
+        )
+    else:
+        raise ValueError("`decoder_type` must be one of ['ctc', 'classification']")
 
     # This is necessary if you are using checkpoints trained with NeMo
     # version before 0.9
@@ -88,6 +114,15 @@ def main(
         jasper_encoder.restore_from(nn_encoder)
     jasper_decoder.restore_from(nn_decoder)
 
+    # Create export directories if they don't already exist
+    base_export_dir, export_fn = os.path.split(nn_onnx_encoder)
+    if not os.path.exists(base_export_dir):
+        os.makedirs(base_export_dir)
+
+    base_export_dir, export_fn = os.path.split(nn_onnx_decoder)
+    if not os.path.exists(base_export_dir):
+        os.makedirs(base_export_dir)
+
     logging.info("Exporting encoder...")
     nf.deployment_export(
         jasper_encoder,
@@ -114,4 +149,5 @@ def main(
         args.onnx_encoder,
         args.onnx_decoder,
         pre_v09_model=args.pre_v09_model,
+        decoder_type=args.decoder_type,
     )
diff --git a/scripts/get_databaker_data.py b/scripts/get_databaker_data.py
index f60bff264506..a03595bed029 100644
--- a/scripts/get_databaker_data.py
+++ b/scripts/get_databaker_data.py
@@ -78,7 +78,7 @@ def __extract_rar(rar_path, dest_dir):
             logging.info("Extraction failed.")
             exit(1)
     else:
-        logging.info("Skipping extracting. Data already there {0}.".format(data_dir))
+        logging.info("Skipping extracting. Data already there {0}.".format(dest_dir))
 
 
 def __convert_waves(wavedir, converted_wavedir, wavename, sr):
diff --git a/scripts/get_hi-mia_data.py b/scripts/get_hi-mia_data.py
new file mode 100644
index 000000000000..145ab6780e3a
--- /dev/null
+++ b/scripts/get_hi-mia_data.py
@@ -0,0 +1,164 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# USAGE: python get_aishell_data.py --data_root=<where to put data>
+
+import argparse
+import json
+import logging
+import os
+import tarfile
+import urllib.request
+from glob import glob
+
+import librosa as l
+from sklearn.model_selection import StratifiedShuffleSplit
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser(description='HI-MIA Data download')
+parser.add_argument("--data_root", required=True, default=None, type=str)
+args = parser.parse_args()
+
+URL = {
+    'dev': "http://www.openslr.org/resources/85/dev.tar.gz",
+    'test': "http://www.openslr.org/resources/85/test.tar.gz",
+    'train': "http://www.openslr.org/resources/85/train.tar.gz",
+}
+
+
+def __maybe_download_file(destination: str, source: str):
+    """
+    Downloads source to destination if it doesn't exist.
+    If exists, skips download
+    Args:
+        destination: local filepath
+        source: url of resource
+
+    Returns:
+
+    """
+    source = URL[source]
+    if not os.path.exists(destination):
+        logging.info("{0} does not exist. Downloading ...".format(destination))
+        urllib.request.urlretrieve(source, filename=destination + '.tmp')
+        os.rename(destination + '.tmp', destination)
+        logging.info("Downloaded {0}.".format(destination))
+    else:
+        logging.info("Destination {0} exists. Skipping.".format(destination))
+    return destination
+
+
+def __extract_all_files(filepath: str, data_root: str, data_dir: str):
+    if not os.path.exists(data_dir):
+        extract_file(filepath, data_root)
+        audio_dir = os.path.join(data_dir, 'wav')
+        for subfolder, _, filelist in os.walk(audio_dir):
+            for ftar in filelist:
+                extract_file(os.path.join(subfolder, ftar), subfolder)
+    else:
+        logging.info('Skipping extracting. Data already there %s' % data_dir)
+
+
+def extract_file(filepath: str, data_dir: str):
+    try:
+        tar = tarfile.open(filepath)
+        tar.extractall(data_dir)
+        tar.close()
+    except Exception:
+        logging.info('Not extracting. Maybe already there?')
+
+
+def write_file(name, lines, idx):
+    with open(name, 'w') as fout:
+        for i in idx:
+            dic = lines[i]
+            json.dump(dic, fout)
+            fout.write('\n')
+    print("wrote", name)
+
+
+def __process_data(data_folder: str, data_set: str):
+    """
+    To generate manifest
+    Args:
+        data_folder: source with wav files
+        dst_folder: where manifest files will be stored
+    Returns:
+
+    """
+    fullpath = os.path.abspath(data_folder)
+    scp = glob(fullpath + '/**/*.wav', recursive=True)
+    out = os.path.join(fullpath, data_set + '_all.json')
+    utt2spk = os.path.join(fullpath, 'utt2spk')
+    utt2spk_file = open(utt2spk, 'w')
+    id = -2  # speaker id
+
+    if os.path.exists(out):
+        os.remove(out)
+
+    speakers = []
+    lines = []
+    with open(out, 'w') as outfile:
+        for line in tqdm(scp):
+            line = line.strip()
+            y, sr = l.load(line, sr=None)
+            if sr != 16000:
+                y, sr = l.load(line, sr=16000)
+                l.output.write_wav(line, y, sr)
+            dur = l.get_duration(y=y, sr=sr)
+            if data_set == 'test':
+                speaker = line.split('/')[-1].split('.')[0].split('_')[0]
+            else:
+                speaker = line.split('/')[id]
+            speaker = list(speaker)
+            speaker = ''.join(speaker)
+            speakers.append(speaker)
+            meta = {"audio_filepath": line, "duration": float(dur), "label": speaker}
+            lines.append(meta)
+            json.dump(meta, outfile)
+            outfile.write("\n")
+            utt2spk_file.write(line.split('/')[-1] + "\t" + speaker + "\n")
+
+    utt2spk_file.close()
+
+    if data_set != 'test':
+        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
+        for train_idx, test_idx in sss.split(speakers, speakers):
+            print(len(train_idx))
+
+        out = os.path.join(fullpath, 'train.json')
+        write_file(out, lines, train_idx)
+        out = os.path.join(fullpath, 'dev.json')
+        write_file(out, lines, test_idx)
+
+
+def main():
+    data_root = args.data_root
+    for data_set in URL.keys():
+
+        # data_set = 'data_aishell'
+        logging.info("\n\nWorking on: {0}".format(data_set))
+        file_path = os.path.join(data_root, data_set + ".tgz")
+        logging.info("Getting {0}".format(data_set))
+        __maybe_download_file(file_path, data_set)
+        logging.info("Extracting {0}".format(data_set))
+        data_folder = os.path.join(data_root, data_set)
+        __extract_all_files(file_path, data_root, data_folder)
+        logging.info("Processing {0}".format(data_set))
+        __process_data(data_folder, data_set)
+        logging.info('Done!')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/get_librispeech_data.py b/scripts/get_librispeech_data.py
index 4975b507c906..61ee8ef29a9a 100755
--- a/scripts/get_librispeech_data.py
+++ b/scripts/get_librispeech_data.py
@@ -24,9 +24,9 @@
 args = parser.parse_args()
 
 URLS = {
-    'TRAIN_CLEAN_100': ("http://www.openslr.org/resources/12/train-clean-100" ".tar.gz"),
-    'TRAIN_CLEAN_360': ("http://www.openslr.org/resources/12/train-clean-360" ".tar.gz"),
-    'TRAIN_OTHER_500': ("http://www.openslr.org/resources/12/train-other-500" ".tar.gz"),
+    'TRAIN_CLEAN_100': ("http://www.openslr.org/resources/12/train-clean-100.tar.gz"),
+    'TRAIN_CLEAN_360': ("http://www.openslr.org/resources/12/train-clean-360.tar.gz"),
+    'TRAIN_OTHER_500': ("http://www.openslr.org/resources/12/train-other-500.tar.gz"),
     'DEV_CLEAN': "http://www.openslr.org/resources/12/dev-clean.tar.gz",
     'DEV_OTHER': "http://www.openslr.org/resources/12/dev-other.tar.gz",
     'TEST_CLEAN': "http://www.openslr.org/resources/12/test-clean.tar.gz",
@@ -117,7 +117,7 @@ def main():
     data_sets = args.data_sets
 
     if data_sets == "ALL":
-        data_sets = "dev_clean,dev_other,train_clean_100,train_clean_360," "train_other_500,test_clean,test_other"
+        data_sets = "dev_clean,dev_other,train_clean_100,train_clean_360,train_other_500,test_clean,test_other"
 
     for data_set in data_sets.split(','):
         logging.info("\n\nWorking on: {0}".format(data_set))
diff --git a/scripts/process_speech_commands_data.py b/scripts/process_speech_commands_data.py
new file mode 100644
index 000000000000..f255d80d27a0
--- /dev/null
+++ b/scripts/process_speech_commands_data.py
@@ -0,0 +1,220 @@
+# Copyright (c) 2019 NVIDIA Corporation
+#
+# USAGE: python get_ljspeech_data.py --data_root=<where to put data>
+
+import argparse
+import glob
+import json
+import logging
+import os
+import re
+import tarfile
+import urllib.request
+
+import librosa
+import numpy as np
+
+URL_v1 = "http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz"
+URL_v2 = "http://download.tensorflow.org/data/speech_commands_v0.02.tar.gz"
+
+
+def __maybe_download_file(destination: str, source: str):
+    """
+    Downloads source to destination if it doesn't exist.
+    If exists, skips download
+    Args:
+        destination: local filepath
+        source: url of resource
+
+    Returns:
+
+    """
+    if not os.path.exists(destination):
+        logging.info(f"{destination} does not exist. Downloading ...")
+        urllib.request.urlretrieve(source, filename=destination + '.tmp')
+        os.rename(destination + '.tmp', destination)
+        logging.info(f"Downloaded {destination}.")
+    else:
+        logging.info(f"Destination {destination} exists. Skipping.")
+    return destination
+
+
+def __extract_all_files(filepath: str, data_root: str, data_dir: str):
+    if not os.path.exists(data_dir):
+        extract_file(filepath, data_dir)
+    else:
+        logging.info(f'Skipping extracting. Data already there {data_dir}')
+
+
+def extract_file(filepath: str, data_dir: str):
+    try:
+        tar = tarfile.open(filepath)
+        tar.extractall(data_dir)
+        tar.close()
+    except Exception:
+        logging.info('Not extracting. Maybe already there?')
+
+
+def __process_data(data_folder: str, dst_folder: str, rebalance: bool = False):
+    """
+    To generate manifest
+
+    Args:
+        data_folder: source with wav files and validation / test lists
+        dst_folder: where manifest files will be stored
+        rebalance:
+
+    Returns:
+
+    """
+
+    if not os.path.exists(dst_folder):
+        os.makedirs(dst_folder)
+
+    pattern = re.compile(r"(.+\/)?(\w+)\/([^_]+)_.+wav")
+    all_files = glob.glob(os.path.join(data_folder, '*/*wav'))
+
+    with open(os.path.join(data_folder, 'validation_list.txt'), 'r') as fin:
+        validation_files = fin.readlines()
+
+    valset = set()
+    for entry in validation_files:
+        r = re.match(pattern, entry)
+        if r:
+            valset.add(r.group(3))
+
+    with open(os.path.join(data_folder, 'testing_list.txt'), 'r') as fin:
+        testing_files = fin.readlines()
+
+    testset = set()
+    for entry in testing_files:
+        r = re.match(pattern, entry)
+        if r:
+            testset.add(r.group(3))
+
+    label_count = {}
+    label_filepaths = {}
+
+    train, val, test = [], [], []
+    for entry in all_files:
+        r = re.match(pattern, entry)
+        if r:
+            label, uid = r.group(2), r.group(3)
+            if label == '_background_noise_':
+                continue
+
+            sample = (label, entry)
+
+            if uid not in valset and uid not in testset:
+                if label in label_count:
+                    label_count[label] += 1
+                else:
+                    label_count[label] = 1
+
+                if label in label_filepaths:
+                    label_filepaths[label] += [sample]
+                else:
+                    label_filepaths[label] = [sample]
+
+            if uid in valset:
+                val.append(sample)
+            elif uid in testset:
+                test.append(sample)
+            else:
+                train.append(sample)
+
+    max_command = None
+    max_count = -1
+    for command, count in label_count.items():
+        if count > max_count:
+            max_count = count
+            max_command = command
+
+    if rebalance:
+        logging.info(f"Command with maximum number of samples = {max_command} with {max_count} samples")
+        logging.info(f"Rebalancing dataset by duplicating classes with less than {max_count} samples...")
+
+        for command, samples in label_filepaths.items():
+            filepaths = [sample[1] for sample in samples]
+
+            rng = np.random.RandomState(0)
+            filepaths = np.asarray(filepaths)
+            num_samples = len(filepaths)
+
+            if num_samples < max_count:
+                difference = max_count - num_samples
+                duplication_ids = rng.choice(num_samples, difference, replace=True)
+
+                filepaths = np.append(filepaths, filepaths[duplication_ids], axis=0)
+
+                logging.info(f"Extended class label {command} from {num_samples} samples to {len(filepaths)} samples")
+
+                label_filepaths[command] = [(command, filepath) for filepath in filepaths]
+
+        del train
+        train = []
+        for label, samples in label_filepaths.items():
+            train.extend(samples)
+
+    manifests = [
+        ('train_manifest.json', train),
+        ('validation_manifest.json', val),
+        ('test_manifest.json', test),
+    ]
+
+    for manifest_filename, dataset in manifests:
+        with open(os.path.join(dst_folder, manifest_filename), 'w') as fout:
+            for label, audio_path in dataset:
+                duration = librosa.core.get_duration(filename=audio_path)
+
+                # Write the metadata to the manifest
+                metadata = {
+                    "audio_filepath": audio_path,
+                    "duration": duration,
+                    "command": label,
+                }
+                json.dump(metadata, fout)
+                fout.write('\n')
+                fout.flush()
+
+        logging.info(f"Finished construction of manifest : {manifest_filename}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Google Speech Command Data download')
+    parser.add_argument("--data_root", required=True, default=None, type=str)
+    parser.add_argument('--data_version', required=True, default=1, type=int, choices=[1, 2])
+    parser.add_argument('--rebalance', required=False, action='store_true')
+    parser.add_argument('--log', required=False, action='store_true')
+    parser.set_defaults(log=False, rebalance=False)
+    args = parser.parse_args()
+
+    if args.log:
+        logging.basicConfig(level=logging.DEBUG)
+
+    data_root = args.data_root
+    data_set = "google_speech_recognition_v{0}".format(args.data_version)
+    data_folder = os.path.join(data_root, data_set)
+
+    logging.info(f"Working on: {data_set}")
+
+    if args.data_version == 1:
+        URL = URL_v1
+    else:
+        URL = URL_v2
+
+    # Download and extract
+    if not os.path.exists(data_folder):
+        file_path = os.path.join(data_root, data_set + ".tar.bz2")
+        logging.info(f"Getting {data_set}")
+        __maybe_download_file(file_path, URL)
+        logging.info(f"Extracting {data_set}")
+        __extract_all_files(file_path, data_root, data_folder)
+
+    logging.info(f"Processing {data_set}")
+    __process_data(data_folder, data_folder, rebalance=args.rebalance)
+    logging.info('Done!')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/quartznet_model_for_jarvis.py b/scripts/quartznet_model_for_jarvis.py
new file mode 100644
index 000000000000..f40e5e4a76ef
--- /dev/null
+++ b/scripts/quartznet_model_for_jarvis.py
@@ -0,0 +1,8 @@
+# Import NeMo and ASR collection
+import nemo
+import nemo.collections.asr as nemo_asr
+
+nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)
+
+pre_trained_qn_model = nemo_asr.models.ASRConvCTCModel.from_pretrained(model_info="QuartzNet15x5-En-BASE")
+pre_trained_qn_model.export('quartznet.nemo', optimize_for_deployment=True)
diff --git a/scripts/scp_to_manifest.py b/scripts/scp_to_manifest.py
new file mode 100644
index 000000000000..ec297bcc739a
--- /dev/null
+++ b/scripts/scp_to_manifest.py
@@ -0,0 +1,96 @@
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import logging
+import os
+
+import librosa as l
+from sklearn.model_selection import StratifiedShuffleSplit
+from tqdm import tqdm
+
+
+"""
+This scipt converts a scp file where each line contains  
+<absolute path of wav file> 
+to a manifest json file. 
+Args: 
+--scp: scp file name
+--id: index of speaker label in filename present in scp file that is separated by '/'
+--out: output manifest file name
+--split: True / False if you would want to split the  manifest file for training purposes
+        you may not need this for test set. output file names is <out>_<train/dev>.json
+        Defaults to False
+"""
+
+
+def write_file(name, lines, idx):
+    with open(name, 'w') as fout:
+        for i in idx:
+            dic = lines[i]
+            json.dump(dic, fout)
+            fout.write('\n')
+    logging.info("wrote", name)
+
+
+def main(scp, id, out, split=False):
+    if os.path.exists(out):
+        os.remove(out)
+    scp_file = open(scp, 'r').readlines()
+
+    lines = []
+    speakers = []
+    with open(out, 'w') as outfile:
+        for line in tqdm(scp_file):
+            line = line.strip()
+            y, sr = l.load(line, sr=None)
+            dur = l.get_duration(y=y, sr=sr)
+            speaker = line.split('/')[id]
+            speaker = list(speaker)
+            speaker = ''.join(speaker)
+            speakers.append(speaker)
+            meta = {"audio_filepath": line, "duration": float(dur), "label": speaker}
+            lines.append(meta)
+            json.dump(meta, outfile)
+            outfile.write("\n")
+
+    path = os.path.dirname(out)
+    if split:
+        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42)
+        for train_idx, test_idx in sss.split(speakers, speakers):
+            logging.info(len(train_idx))
+
+        out = os.path.join(path, 'train.json')
+        write_file(out, lines, train_idx)
+        out = os.path.join(path, 'dev.json')
+        write_file(out, lines, test_idx)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--scp", help="scp file name", type=str, required=True)
+    parser.add_argument(
+        "--id", help="field num seperated by '/' to be considered as speaker label", type=int, required=True
+    )
+    parser.add_argument("--out", help="manifest_file name", type=str, required=True)
+    parser.add_argument(
+        "--split",
+        help="bool if you would want to split the manifest file for training purposes",
+        required=False,
+        action='store_true',
+    )
+    args = parser.parse_args()
+
+    main(args.scp, args.id, args.out, args.split)
diff --git a/setup.cfg b/setup.cfg
index 546d5adbc10b..0e3acd5c601b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,37 @@
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
 [aliases]
-test = pytest
+test=pytest
+
+# durations=0 will display all tests execution time, sorted in ascending order starting from from the slowest one.
+# -vv will also display tests with durration = 0.00s
+[tool:pytest]
+addopts = --verbose --pyargs --durations=0
+markers =
+    unit: marks unit test, i.e. testing a single, well isolated functionality (deselect with '-m "not unit"')
+    integration: marks test checking the elements when integrated into subsystems (deselect with '-m "not integration"')
+    system: marks test working at the highest integration level (deselect with '-m "not system"')
+    acceptance: marks test checking whether the developed product/model passes the user defined acceptance criteria (deselect with '-m "not acceptance"')
+    docs: mark tests related to documentation (deselect with '-m "not docs"')
+    skipduringci: marks tests that are skipped ci as they are addressed by Jenkins jobs but should be run to test user setups
 
 [isort]
 known_localfolder = nemo,tests
 sections = FUTURE,STDLIB,THIRDPARTY,LOCALFOLDER
 default_section = THIRDPARTY
-skip = setup.py
+#TODO tests/unit/core/test_deploy_export.py gets screwed by isort
+skip = setup.py, docs/sources/source/conf.py, nemo/utils/__init__.py, tests/unit/core/test_deploy_export.py, docs/docs_zh/sources/source/conf.py
diff --git a/setup.py b/setup.py
index 1fbe0df0484d..fae6a943613d 100644
--- a/setup.py
+++ b/setup.py
@@ -33,7 +33,7 @@ def is_build_action():
     if len(sys.argv) <= 1:
         return False
 
-    BUILD_TOKENS = ["egg_info", "dist", "bdist", "sdist", "install", "build", "develop"]
+    BUILD_TOKENS = ["egg_info", "dist", "bdist", "sdist", "install", "build", "develop", "style", "clean"]
 
     if any([sys.argv[1].startswith(x) for x in BUILD_TOKENS]):
         return True
@@ -57,18 +57,18 @@ def is_build_action():
     __version__,
 )
 
-if os.path.exists('README.rst'):
+if os.path.exists('nemo/README.md'):
+    with open("nemo/README.md", "r") as fh:
+        long_description = fh.read()
+    long_description_content_type = "text/markdown"
+
+elif os.path.exists('README.rst'):
     # codec is used for consistent encoding
     long_description = codecs.open(
         os.path.join(os.path.abspath(os.path.dirname(__file__)), 'README.rst'), 'r', 'utf-8',
     ).read()
     long_description_content_type = "text/x-rst"
 
-elif os.path.exists('README.md'):
-    with open("README.md", "r") as fh:
-        long_description = fh.read()
-    long_description_content_type = "text/markdown"
-
 else:
     long_description = 'See ' + __homepage__
 
@@ -118,7 +118,7 @@ class StyleCommand(distutils_cmd.Command):
         'isort '
         # These two lines makes isort compatible with black.
         '--multi-line=3 --trailing-comma --force-grid-wrap=0 '
-        f'--use-parentheses --line-width={__LINE_WIDTH} -rc'
+        f'--use-parentheses --line-width={__LINE_WIDTH} -rc -ws'
     )
     __BLACK_BASE = f'black --skip-string-normalization --line-length={__LINE_WIDTH}'
     description = 'Checks overall project code style.'
@@ -186,6 +186,7 @@ def finalize_options(self):
     version=__version__,
     description=__description__,
     long_description=long_description,
+    long_description_content_type=long_description_content_type,
     # The project's main homepage.
     url=__repository_url__,
     download_url=__download_url__,
diff --git a/tests/__init__.py b/tests/__init__.py
index 3c5667660d12..e69de29bb2d1 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,17 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
diff --git a/tests/asr/test_asr.py b/tests/asr/test_asr.py
deleted file mode 100644
index 38bd05826ee8..000000000000
--- a/tests/asr/test_asr.py
+++ /dev/null
@@ -1,633 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-import os
-import shutil
-import tarfile
-import unittest
-
-from ruamel.yaml import YAML
-
-import nemo
-import nemo.collections.asr as nemo_asr
-from nemo.collections.asr.parts import AudioDataset, WaveformFeaturizer, collections, parsers
-from nemo.core import DeviceType
-from tests.common_setup import NeMoUnitTest
-
-logging = nemo.logging
-
-
-freq = 16000
-
-
-class TestASRPytorch(NeMoUnitTest):
-    labels = [
-        " ",
-        "a",
-        "b",
-        "c",
-        "d",
-        "e",
-        "f",
-        "g",
-        "h",
-        "i",
-        "j",
-        "k",
-        "l",
-        "m",
-        "n",
-        "o",
-        "p",
-        "q",
-        "r",
-        "s",
-        "t",
-        "u",
-        "v",
-        "w",
-        "x",
-        "y",
-        "z",
-        "'",
-    ]
-    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
-    featurizer_config = {
-        'window': 'hann',
-        'dither': 1e-05,
-        'normalize': 'per_feature',
-        'frame_splicing': 1,
-        'int_values': False,
-        'window_stride': 0.01,
-        'sample_rate': freq,
-        'features': 64,
-        'n_fft': 512,
-        'window_size': 0.02,
-    }
-    yaml = YAML(typ="safe")
-
-    @classmethod
-    def setUpClass(cls) -> None:
-        super().setUpClass()
-        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
-        logging.info("Looking up for test ASR data")
-        if not os.path.exists(os.path.join(data_folder, "asr")):
-            logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr")))
-            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
-            tar.extractall(path=data_folder)
-            tar.close()
-        else:
-            logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr")))
-
-    # @classmethod
-    # def tearDownClass(cls) -> None:
-    #     super().tearDownClass()
-    #     data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
-    #     logging.info("Looking up for test ASR data")
-    #     if os.path.exists(os.path.join(data_folder, "asr")):
-    #         shutil.rmtree(os.path.join(data_folder, "asr"))
-
-    def test_transcript_normalizers(self):
-        # Create test json
-        test_strings = [
-            "TEST CAPITALIZATION",
-            '!\\"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~',
-            "3+3=10",
-            "3 + 3 = 10",
-            "why     is \\t whitepsace\\tsuch a problem   why indeed",
-            "\\\"Can you handle quotes?,\\\" says the boy",
-            "I Jump!!!!With joy?Now.",
-            "Maybe I want to learn periods.",
-            "$10 10.90 1-800-000-0000",
-            "18000000000 one thousand 2020",
-            "1 10 100 1000 10000 100000 1000000",
-            "Î  ĻƠvɆȩȅĘ ÀÁÃ Ą ÇĊňńŤŧș",
-            "‘’“”❛❜❝❞「 」 〈 〉 《 》 【 】 〔 〕 ⦗ ⦘ 😙  👀 🔨",
-            "It only costs $1 000 000! Cheap right?",
-            "2500, 3000 are separate but 200, 125 is not",
-            "1",
-            "1 2",
-            "1 2 3",
-            "10:00pm is 10:00 pm is 22:00 but not 10: 00 pm",
-            "10:00 10:01pm 10:10am 10:90pm",
-            "Mr. Expand me!",
-            "Mr Don't Expand me!",
-        ]
-        normalized_strings = [
-            "test capitalization",
-            'percent and \' plus',
-            "three plus three ten",
-            "three plus three ten",
-            "why is whitepsace such a problem why indeed",
-            "can you handle quotes says the boy",
-            "i jump with joy now",
-            "maybe i want to learn periods",
-            "ten dollars ten point nine zero one eight hundred zero zero",
-            "eighteen billion one thousand two thousand and twenty",
-            # Two line string below
-            "one ten thousand one hundred one thousand ten thousand one " "hundred thousand one million",
-            "i loveeee aaa a ccnntts",
-            "''",
-            "it only costs one million dollars cheap right",
-            # Two line string below
-            "two thousand five hundred three thousand are separate but two "
-            "hundred thousand one hundred and twenty five is not",
-            "one",
-            "one two",
-            "one two three",
-            "ten pm is ten pm is twenty two but not ten zero pm",
-            "ten ten one pm ten ten am ten ninety pm",
-            "mister expand me",
-            "mr don't expand me",
-        ]
-        manifest_paths = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/manifest_test.json"))
-
-        def remove_test_json():
-            os.remove(manifest_paths)
-
-        self.addCleanup(remove_test_json)
-
-        with open(manifest_paths, "w") as f:
-            for s in test_strings:
-                f.write('{"audio_filepath": "", "duration": 1.0, "text": ' f'"{s}"}}\n')
-        parser = parsers.make_parser(self.labels, 'en')
-        manifest = collections.ASRAudioText(manifests_files=[manifest_paths], parser=parser,)
-
-        for i, s in enumerate(normalized_strings):
-            self.assertTrue(manifest[i].text_tokens == parser(s))
-
-    def test_pytorch_audio_dataset(self):
-        featurizer = WaveformFeaturizer.from_config(self.featurizer_config)
-        ds = AudioDataset(manifest_filepath=self.manifest_filepath, labels=self.labels, featurizer=featurizer,)
-
-        for i in range(len(ds)):
-            if i == 5:
-                logging.info(ds[i])
-            # logging.info(ds[i][0].shape)
-            # self.assertEqual(freq, ds[i][0].shape[0])
-
-    def test_dataloader(self):
-        batch_size = 4
-        dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            labels=self.labels,
-            batch_size=batch_size,
-            # placement=DeviceType.GPU,
-            drop_last=True,
-        )
-        for ind, data in enumerate(dl.data_iterator):
-            # With num_workers update, this is no longer true
-            # Moving to GPU is handled by AudioPreprocessor
-            # data is on GPU
-            # self.assertTrue(data[0].is_cuda)
-            # self.assertTrue(data[1].is_cuda)
-            # self.assertTrue(data[2].is_cuda)
-            # self.assertTrue(data[3].is_cuda)
-            # first dimension is batch
-            self.assertTrue(data[0].size(0) == batch_size)
-            self.assertTrue(data[1].size(0) == batch_size)
-            self.assertTrue(data[2].size(0) == batch_size)
-            self.assertTrue(data[3].size(0) == batch_size)
-
-    def test_preprocessor_errors(self):
-        def create_broken_preprocessor_1():
-            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=2, n_window_size=2)
-
-        def create_broken_preprocessor_2():
-            nemo_asr.AudioToMelSpectrogramPreprocessor(window_stride=2, n_window_stride=2)
-
-        def create_broken_preprocessor_3():
-            nemo_asr.AudioToMelSpectrogramPreprocessor(n_window_stride=2)
-
-        def create_good_preprocessor_1():
-            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=0.02, window_stride=0.01)
-
-        def create_good_preprocessor_2():
-            nemo_asr.AudioToMelSpectrogramPreprocessor(
-                window_size=None, window_stride=None, n_window_size=256, n_window_stride=32,
-            )
-
-        self.assertRaises(ValueError, create_broken_preprocessor_1)
-        self.assertRaises(ValueError, create_broken_preprocessor_2)
-        self.assertRaises(ValueError, create_broken_preprocessor_3)
-        create_good_preprocessor_1()
-        create_good_preprocessor_2()
-
-    def test_kaldi_dataloader(self):
-        batch_size = 4
-        dl = nemo_asr.KaldiFeatureDataLayer(
-            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
-            labels=self.labels,
-            batch_size=batch_size,
-        )
-        for data in dl.data_iterator:
-            self.assertTrue(data[0].size(0) == batch_size)
-
-        dl_test_min = nemo_asr.KaldiFeatureDataLayer(
-            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
-            labels=self.labels,
-            batch_size=batch_size,
-            min_duration=1.0,
-        )
-        self.assertTrue(len(dl_test_min) == 18)
-
-        dl_test_max = nemo_asr.KaldiFeatureDataLayer(
-            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
-            labels=self.labels,
-            batch_size=batch_size,
-            max_duration=5.0,
-        )
-        self.assertTrue(len(dl_test_max) == 19)
-
-    def test_trim_silence(self):
-        batch_size = 4
-        normal_dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            labels=self.labels,
-            batch_size=batch_size,
-            # placement=DeviceType.GPU,
-            drop_last=True,
-            shuffle=False,
-        )
-        trimmed_dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            trim_silence=True,
-            labels=self.labels,
-            batch_size=batch_size,
-            # placement=DeviceType.GPU,
-            drop_last=True,
-            shuffle=False,
-        )
-        for norm, trim in zip(normal_dl.data_iterator, trimmed_dl.data_iterator):
-            for point in range(batch_size):
-                self.assertTrue(norm[1][point].data >= trim[1][point].data)
-
-    def test_audio_preprocessors(self):
-        batch_size = 5
-        dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            labels=self.labels,
-            batch_size=batch_size,
-            # placement=DeviceType.GPU,
-            drop_last=True,
-            shuffle=False,
-        )
-
-        installed_torchaudio = True
-        try:
-            import torchaudio
-        except ModuleNotFoundError:
-            installed_torchaudio = False
-            with self.assertRaises(ModuleNotFoundError):
-                to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
-            with self.assertRaises(ModuleNotFoundError):
-                to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
-
-        if installed_torchaudio:
-            to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
-            to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
-
-        to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50)
-
-        for batch in dl.data_iterator:
-            input_signals, seq_lengths, _, _ = batch
-            input_signals = input_signals.to(to_melspec._device)
-            seq_lengths = seq_lengths.to(to_melspec._device)
-
-            melspec = to_melspec.forward(input_signals, seq_lengths)
-
-            if installed_torchaudio:
-                spec = to_spectrogram.forward(input_signals, seq_lengths)
-                mfcc = to_mfcc.forward(input_signals, seq_lengths)
-
-            # Check that number of features is what we expect
-            self.assertTrue(melspec[0].shape[1] == 50)
-
-            if installed_torchaudio:
-                self.assertTrue(spec[0].shape[1] == 201)  # n_fft // 2 + 1 bins
-                self.assertTrue(mfcc[0].shape[1] == 15)
-
-    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
-    def test_jasper_training(self):
-        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
-            jasper_model_definition = self.yaml.load(file)
-        dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            labels=self.labels,
-            batch_size=4,
-        )
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-
-        # DAG
-        audio_signal, a_sig_length, transcript, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-
-        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
-        # logging.info(jasper_encoder)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
-        )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = self.nf.get_trainer()
-        optimizer.train(
-            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
-    def test_double_jasper_training(self):
-        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
-            jasper_model_definition = self.yaml.load(file)
-        dl = nemo_asr.AudioToTextDataLayer(
-            # featurizer_config=self.featurizer_config,
-            manifest_filepath=self.manifest_filepath,
-            labels=self.labels,
-            batch_size=4,
-        )
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        jasper_encoder1 = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        jasper_encoder2 = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        # mx_max1 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
-        # mx_max2 = nemo.backends.pytorch.common.SimpleCombiner(mode="max")
-        jasper_decoder1 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-        jasper_decoder2 = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-
-        # DAG
-        audio_signal, a_sig_length, transcript, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-
-        encoded1, encoded_len1 = jasper_encoder1(audio_signal=processed_signal, length=p_length)
-        encoded2, encoded_len2 = jasper_encoder2(audio_signal=processed_signal, length=p_length)
-        log_probs1 = jasper_decoder1(encoder_output=encoded1)
-        log_probs2 = jasper_decoder2(encoder_output=encoded2)
-        # log_probs = mx_max1(x1=log_probs1, x2=log_probs2)
-        # encoded_len = mx_max2(x1=encoded_len1, x2=encoded_len2)
-        log_probs = log_probs1
-        encoded_len = encoded_len1
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))
-        )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = self.nf.get_trainer()
-        optimizer.train(
-            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    # @unittest.skip("Init parameters of nemo_asr.AudioToMelSpectrogramPreprocessor are invalid")
-    def test_quartznet_training(self):
-        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/quartznet_test.yaml"))) as f:
-            quartz_model_definition = self.yaml.load(f)
-        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4,)
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=quartz_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **quartz_model_definition['JasperEncoder'],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-
-        # DAG
-        audio_signal, a_sig_length, transcript, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-
-        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
-        )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = self.nf.get_trainer()
-        optimizer.train(
-            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    def test_stft_conv(self):
-        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
-            jasper_model_definition = self.yaml.load(file)
-        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4,)
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-            'stft_conv': True,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-
-        # DAG
-        audio_signal, a_sig_length, transcript, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-
-        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
-        # logging.info(jasper_encoder)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))
-        )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = self.nf.get_trainer()
-        optimizer.train(
-            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    def test_clas(self):
-        with open('examples/asr/experimental/configs/garnet_an4.yaml') as file:
-            cfg = self.yaml.load(file)
-        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4,)
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-            'stft_conv': True,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        encoder = nemo_asr.JasperEncoder(
-            jasper=cfg['encoder']['jasper'],
-            activation=cfg['encoder']['activation'],
-            feat_in=cfg['input']['train']['features'],
-        )
-        connector = nemo_asr.JasperRNNConnector(
-            in_channels=cfg['encoder']['jasper'][-1]['filters'], out_channels=cfg['decoder']['hidden_size'],
-        )
-        decoder = nemo.backends.pytorch.common.DecoderRNN(
-            voc_size=len(self.labels),
-            bos_id=0,
-            hidden_size=cfg['decoder']['hidden_size'],
-            attention_method=cfg['decoder']['attention_method'],
-            attention_type=cfg['decoder']['attention_type'],
-            in_dropout=cfg['decoder']['in_dropout'],
-            gru_dropout=cfg['decoder']['gru_dropout'],
-            attn_dropout=cfg['decoder']['attn_dropout'],
-            teacher_forcing=cfg['decoder']['teacher_forcing'],
-            curriculum_learning=cfg['decoder']['curriculum_learning'],
-            rnn_type=cfg['decoder']['rnn_type'],
-            n_layers=cfg['decoder']['n_layers'],
-            tie_emb_out_weights=cfg['decoder']['tie_emb_out_weights'],
-        )
-        loss = nemo.backends.pytorch.common.SequenceLoss()
-
-        # DAG
-        audio_signal, a_sig_length, transcripts, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-        encoded, encoded_len = encoder(audio_signal=processed_signal, length=p_length)
-        encoded = connector(tensor=encoded)
-        log_probs, _ = decoder(targets=transcripts, encoder_outputs=encoded)
-        loss = loss(log_probs=log_probs, targets=transcripts)
-
-        # Train
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(str(x[0].item()))
-        )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = self.nf.get_trainer()
-        optimizer.train(
-            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    def test_jasper_eval(self):
-        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
-            jasper_model_definition = self.yaml.load(file)
-        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4,)
-        pre_process_params = {
-            'frame_splicing': 1,
-            'features': 64,
-            'window_size': 0.02,
-            'n_fft': 512,
-            'dither': 1e-05,
-            'window': 'hann',
-            'sample_rate': 16000,
-            'normalize': 'per_feature',
-            'window_stride': 0.01,
-        }
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder'],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-        greedy_decoder = nemo_asr.GreedyCTCDecoder()
-        # DAG
-        audio_signal, a_sig_length, transcript, transcript_len = dl()
-        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
-
-        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
-        # logging.info(jasper_encoder)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-        predictions = greedy_decoder(log_probs=log_probs)
-
-        from nemo.collections.asr.helpers import (
-            process_evaluation_batch,
-            process_evaluation_epoch,
-        )
-
-        eval_callback = nemo.core.EvaluatorCallback(
-            eval_tensors=[loss, predictions, transcript, transcript_len],
-            user_iter_callback=lambda x, y: process_evaluation_batch(x, y, labels=self.labels),
-            user_epochs_done_callback=process_evaluation_epoch,
-        )
-        # Instantiate an optimizer to perform `train` action
-        self.nf.eval(callbacks=[eval_callback])
diff --git a/tests/asr/test_zeroDS.py b/tests/asr/test_zeroDS.py
deleted file mode 100644
index 2a7b05e14b55..000000000000
--- a/tests/asr/test_zeroDS.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-import shutil
-import tarfile
-
-import torch
-from ruamel.yaml import YAML
-
-import nemo
-import nemo.collections.asr as nemo_asr
-from nemo.core.neural_types import *
-from tests.common_setup import NeMoUnitTest
-
-logging = nemo.logging
-
-
-class TestZeroDL(NeMoUnitTest):
-    labels = [
-        "'",
-        "a",
-        "b",
-        "c",
-        "d",
-        "e",
-        "f",
-        "g",
-        "h",
-        "i",
-        "j",
-        "k",
-        "l",
-        "m",
-        "n",
-        "o",
-        "p",
-        "q",
-        "r",
-        "s",
-        "t",
-        "u",
-        "v",
-        "w",
-        "x",
-        "y",
-        "z",
-        " ",
-    ]
-    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
-    yaml = YAML(typ="safe")
-
-    @classmethod
-    def setUpClass(cls) -> None:
-        super().setUpClass()
-        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
-        logging.info("Looking up for test ASR data")
-        if not os.path.exists(os.path.join(data_folder, "asr")):
-            logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr")))
-            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
-            tar.extractall(path=data_folder)
-            tar.close()
-        else:
-            logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr")))
-
-    # @classmethod
-    # def tearDownClass(cls) -> None:
-    #     super().tearDownClass()
-    #     data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
-    #     logging.info("Looking up for test ASR data")
-    #     if os.path.exists(os.path.join(data_folder, "asr")):
-    #         shutil.rmtree(os.path.join(data_folder, "asr"))
-
-    def test_asr_with_zero_ds(self):
-        logging.info("Testing ASR NMs with ZeroDS and without pre-processing")
-        path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))
-        with open(path) as file:
-            jasper_model_definition = self.yaml.load(file)
-
-        dl = nemo.backends.pytorch.common.ZerosDataLayer(
-            size=100,
-            dtype=torch.FloatTensor,
-            batch_size=4,
-            output_ports={
-                # "processed_signal": NeuralType(
-                #    {
-                #        0: AxisType(BatchTag),
-                #        1: AxisType(SpectrogramSignalTag, dim=64),
-                #        2: AxisType(ProcessedTimeTag, dim=64),
-                #    }
-                # ),
-                # "processed_length": NeuralType({0: AxisType(BatchTag)}),
-                # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}),
-                # "transcript_length": NeuralType({0: AxisType(BatchTag)}),
-                "processed_signal": NeuralType(
-                    (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)),
-                    SpectrogramType(),
-                ),
-                "processed_length": NeuralType(tuple('B'), LengthsType()),
-                "transcript": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64)), LabelsType()),
-                "transcript_length": NeuralType(tuple('B'), LengthsType()),
-            },
-        )
-
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition["JasperEncoder"],
-        )
-        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
-        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
-
-        # DAG
-        processed_signal, p_length, transcript, transcript_len = dl()
-        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
-        # logging.info(jasper_encoder)
-        log_probs = jasper_decoder(encoder_output=encoded)
-        loss = ctc_loss(
-            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
-        )
-        # Instantiate an optimizer to perform `train` action
-        self.nf.train(
-            [loss], callbacks=[callback], optimization_params={"num_epochs": 2, "lr": 0.0003}, optimizer="sgd",
-        )
diff --git a/tests/configs/test_deploy_export.yaml b/tests/configs/test_deploy_export.yaml
new file mode 100644
index 000000000000..f1a48531effc
--- /dev/null
+++ b/tests/configs/test_deploy_export.yaml
@@ -0,0 +1,151 @@
+TaylorNet:
+    header:
+        full_spec: nemo.backends.pytorch.tutorials.toys.TaylorNet
+    init_params:
+        dim: 4
+
+TokenClassifier:
+    header:
+        full_spec: nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier
+    init_params:
+        hidden_size: 512
+        num_classes: 16
+        use_transformer_pretrained: False
+
+JasperDecoderForCTC:
+    header:
+        full_spec: nemo.collections.asr.JasperDecoderForCTC
+    init_params:
+        feat_in: 1024
+        num_classes: 33
+        vocabulary: ["A", "B", "C", "D", "E", " ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+
+JasperEncoder:
+    header:
+        full_spec: nemo.collections.asr.JasperEncoder
+    init_params:
+        activation: "relu"
+        conv_mask: false
+        feat_in: 64
+
+        jasper:
+            -   filters: 256
+                repeat: 1
+                kernel: [11]
+                stride: [2]
+                dilation: [1]
+                dropout: 0.0
+                residual: false
+
+            -   filters: 256
+                repeat: 3
+                kernel: [11]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.1
+                residual: true
+
+            -   filters: 512
+                repeat: 2
+                kernel: [17]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.1
+                residual: true
+
+            -   filters: 768
+                repeat: 1
+                kernel: [25]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.1
+                residual: true
+
+            -   filters: 1024
+                repeat: 1
+                kernel: [1]
+                stride: [1]
+                dilation: [1]
+                dropout: 0.2
+                residual: false
+
+dropout: &drop 0.2
+QuartznetEncoder:
+    header:
+        full_spec: nemo.collections.asr.JasperEncoder
+    init_params:
+        activation: "relu"
+        conv_mask: false
+        feat_in: 64
+
+        jasper:
+            -   filters: 256
+                repeat: 1
+                kernel: [33]
+                stride: [2]
+                dilation: [1]
+                dropout: *drop
+                residual: false
+                separable: true
+
+            -   filters: 512
+                repeat: 3
+                kernel: [63]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+                separable: true
+
+            -   filters: 512
+                repeat: 3
+                kernel: [63]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+                separable: true
+
+            -   filters: 512
+                repeat: 3
+                kernel: [75]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+                separable: true
+
+            -   filters: 512
+                repeat: 3
+                kernel: [75]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+                separable: true
+
+            -   filters: 512
+                repeat: 3
+                kernel: [75]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: true
+                separable: true
+
+            -   filters: 512
+                repeat: 1
+                kernel: [87]
+                stride: [1]
+                dilation: [2]
+                dropout: *drop
+                residual: false
+                separable: true
+
+            -   filters: 1024
+                repeat: 1
+                kernel: [1]
+                stride: [1]
+                dilation: [1]
+                dropout: *drop
+                residual: false
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 000000000000..273d23ca28b5
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,62 @@
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo import logging
+from nemo.core import DeviceType, NeuralModuleFactory
+
+
+def pytest_addoption(parser):
+    """ Additional command-line arguments passed to pytest. For now: --cpu """
+    parser.addoption('--cpu', action='store_true', help="pass that argument to use CPU during testing (default: GPU)")
+
+
+@pytest.fixture
+def device(request):
+    """ Simple fixture returning string denoting the device [CPU | GPU] """
+    if request.config.getoption("--cpu"):
+        return "CPU"
+    else:
+        return "GPU"
+
+
+@pytest.fixture(scope="class")
+def neural_factory(request):
+    """ Fixture creating a Neural Factory object parametrized by the command line --cpu argument """
+    # Get flag.
+    if request.config.getoption("--cpu"):
+        device = DeviceType.CPU
+    else:
+        device = DeviceType.GPU
+    # Initialize the default Neural Factory - on GPU.
+    request.cls.nf = NeuralModuleFactory(placement=device)
+
+    # Print standard header.
+    logging.info("Using {} during testing".format(request.cls.nf.placement))
+
+
+@pytest.fixture(autouse=True)
+def run_only_on_device_fixture(request, device):
+    if request.node.get_closest_marker('run_only_on'):
+        if request.node.get_closest_marker('run_only_on').args[0] != device:
+            pytest.skip('skipped on this device: {}'.format(device))
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "run_only_on(device): runs the test only on a given device [CPU | GPU]",
+    )
diff --git a/tests/core/test_deploy_export.py b/tests/core/test_deploy_export.py
deleted file mode 100644
index 303209084802..000000000000
--- a/tests/core/test_deploy_export.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-from pathlib import Path
-
-# git clone git@github.com:microsoft/onnxruntime.git
-# cd onnxruntime
-#
-# ./build.sh --update --build --config RelWithDebInfo  --build_shared_lib --parallel \
-#     --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda \
-#     --tensorrt_home /home/snikolaev/CODE/TensorRT.BIN --use_tensorrt --enable_pybind --build_wheel
-#
-# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/*.whl
-import onnxruntime as ort
-import torch
-from ruamel.yaml import YAML
-
-import nemo
-import nemo.collections.asr as nemo_asr
-import nemo.collections.nlp.nm.trainables.common.token_classification_nm
-from tests.common_setup import NeMoUnitTest
-
-logging = nemo.logging
-
-
-class TestDeployExport(NeMoUnitTest):
-    def setUp(self):
-        """ Setups neural factory so it will use GPU instead of CPU. """
-        NeMoUnitTest.setUp(self)
-
-        # Perform computations on GPU.
-        self.nf._placement = nemo.core.DeviceType.GPU
-
-    def __test_export_route(self, module, out_name, mode, input_example=None):
-        out = Path(out_name)
-        if out.exists():
-            os.remove(out)
-
-        outputs_fwd = (
-            (module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example))
-            if input_example is not None
-            else None
-        )
-        self.nf.deployment_export(
-            module=module, output=out_name, input_example=input_example, d_format=mode, output_example=outputs_fwd
-        )
-
-        tol = 5.0e-3
-        out = Path(out_name)
-        self.assertTrue(out.exists())
-        if mode == nemo.core.DeploymentFormat.ONNX:
-            # Must recompute beause *module* might be different now
-            outputs_fwd = (
-                module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example)
-            )
-            sess_options = ort.SessionOptions()
-            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
-            ort_session = ort.InferenceSession(out_name, sess_options, ['CPUExecutionProvider'])
-            inputs = dict()
-            input_names = list(module.input_ports)
-            for i in range(len(input_names)):
-                input_name = (
-                    "encoded_lengths"
-                    if type(module).__name__ == "JasperEncoder" and input_names[i] == "length"
-                    else input_names[i]
-                )
-                inputs[input_name] = (
-                    input_example[i].cpu().numpy() if isinstance(input_example, tuple) else input_example.cpu().numpy()
-                )
-            logging.info('Execution Providers: ', ort_session.get_providers())
-            outputs_scr = ort_session.run(None, inputs)
-            outputs_scr = torch.from_numpy(outputs_scr[0]).cuda()
-        elif mode == nemo.core.DeploymentFormat.TORCHSCRIPT:
-            scr = torch.jit.load(out_name)
-            if isinstance(module, nemo.backends.pytorch.tutorials.TaylorNet):
-                input_example = torch.randn(4, 1).cuda()
-                outputs_fwd = module.forward(input_example)
-            outputs_scr = (
-                scr.forward(*input_example) if isinstance(input_example, tuple) else scr.forward(input_example)
-            )
-        elif mode == nemo.core.DeploymentFormat.PYTORCH:
-            module.load_state_dict(torch.load(out_name))
-            module.eval()
-            outputs_scr = (
-                module.forward(*input_example) if isinstance(input_example, tuple) else module.forward(input_example)
-            )
-
-        outputs_scr = outputs_scr[0] if isinstance(outputs_scr, tuple) else outputs_scr
-        outputs_fwd = outputs_fwd[0] if isinstance(outputs_fwd, tuple) else outputs_fwd
-        self.assertLess((outputs_scr - outputs_fwd).norm(p=2), tol)
-
-        if out.exists():
-            os.remove(out)
-        if mode == nemo.core.DeploymentFormat.PYTORCH and out.with_suffix(out.suffix + ".json").exists():
-            os.remove(out.with_suffix(out.suffix + ".json"))
-
-    def __test_export_route_all(self, module, out_name, input_example=None):
-        if input_example is not None:
-            self.__test_export_route(module, out_name + '.onnx', nemo.core.DeploymentFormat.ONNX, input_example)
-            self.__test_export_route(module, out_name + '.pt', nemo.core.DeploymentFormat.PYTORCH, input_example)
-        self.__test_export_route(module, out_name + '.ts', nemo.core.DeploymentFormat.TORCHSCRIPT, input_example)
-
-    def test_simple_module_export(self):
-        simplest_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        self.__test_export_route_all(
-            module=simplest_module, out_name="simple", input_example=None,
-        )
-
-    def test_TokenClassifier_module_export(self):
-        t_class = nemo.collections.nlp.nm.trainables.common.token_classification_nm.TokenClassifier(
-            hidden_size=512, num_classes=16, use_transformer_pretrained=False
-        )
-        self.__test_export_route_all(
-            module=t_class, out_name="t_class", input_example=torch.randn(16, 16, 512).cuda(),
-        )
-
-    def test_jasper_decoder(self):
-        j_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=33)
-        self.__test_export_route_all(
-            module=j_decoder, out_name="j_decoder", input_example=torch.randn(34, 1024, 1).cuda(),
-        )
-
-    def test_hf_bert(self):
-        bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
-        input_example = (
-            torch.randint(low=0, high=16, size=(2, 16)).cuda(),
-            torch.randint(low=0, high=1, size=(2, 16)).cuda(),
-            torch.randint(low=0, high=1, size=(2, 16)).cuda(),
-        )
-        self.__test_export_route_all(module=bert, out_name="bert", input_example=input_example)
-
-    def test_jasper_encoder(self):
-        with open("tests/data/jasper_smaller.yaml") as file:
-            yaml = YAML(typ="safe")
-            jasper_model_definition = yaml.load(file)
-
-        jasper_encoder = nemo_asr.JasperEncoder(
-            conv_mask=False,
-            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **jasper_model_definition['JasperEncoder']
-        )
-
-        self.__test_export_route_all(
-            module=jasper_encoder,
-            out_name="jasper_encoder",
-            input_example=(torch.randn(16, 64, 256).cuda(), torch.randn(256).cuda()),
-        )
-
-    def test_quartz_encoder(self):
-        with open("tests/data/quartznet_test.yaml") as file:
-            yaml = YAML(typ="safe")
-            quartz_model_definition = yaml.load(file)
-
-        jasper_encoder = nemo_asr.JasperEncoder(
-            feat_in=quartz_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
-            **quartz_model_definition['JasperEncoder']
-        )
-
-        self.__test_export_route_all(
-            module=jasper_encoder,
-            out_name="quartz_encoder",
-            input_example=(torch.randn(16, 64, 256).cuda(), torch.randint(20, (16,)).cuda()),
-        )
diff --git a/tests/core/test_neural_factory.py b/tests/core/test_neural_factory.py
deleted file mode 100644
index 3973ce00464e..000000000000
--- a/tests/core/test_neural_factory.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import nemo
-from tests.common_setup import NeMoUnitTest
-
-
-class TestNeuralFactory(NeMoUnitTest):
-    def test_create_single_module(self):
-        instance = self.nf.get_module(name="TaylorNet", collection="toys", params={"dim": 4})
-        self.assertTrue(isinstance(instance, nemo.backends.pytorch.tutorials.TaylorNet))
-
-    def test_create_simple_graph(self):
-        dl = self.nf.get_module(
-            name="RealFunctionDataLayer", collection="toys", params={"n": 10000, "batch_size": 128},
-        )
-        fx = self.nf.get_module(name="TaylorNet", collection="toys", params={"dim": 4})
-        loss = self.nf.get_module(name="MSELoss", collection="toys", params={})
-
-        x, y = dl()
-        y_pred = fx(x=x)
-        _ = loss(predictions=y_pred, target=y)
diff --git a/tests/core/test_neural_modules.py b/tests/core/test_neural_modules.py
deleted file mode 100644
index 04e82c2802bf..000000000000
--- a/tests/core/test_neural_modules.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2019 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import nemo
-from nemo.core.neural_types import ChannelType, NeuralType
-from tests.common_setup import NeMoUnitTest
-
-
-class NeuralModulesTests(NeMoUnitTest):
-    def test_call_TaylorNet(self):
-        x_tg = nemo.core.neural_modules.NmTensor(
-            producer=None, producer_args=None, name=None, ntype=NeuralType(('B', 'D'), ChannelType())
-        )
-
-        tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        # note that real port's name: x was used
-        y_pred = tn(x=x_tg)
-        self.assertEqual(y_pred.producer, tn)
-        self.assertEqual(y_pred.producer_args.get("x"), x_tg)
-
-    def test_simplest_example_chain(self):
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=1)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-        x, y = data_source()
-        y_pred = trainable_module(x=x)
-        loss_tensor = loss(predictions=y_pred, target=y)
-
-        # check producers' bookkeeping
-        self.assertEqual(loss_tensor.producer, loss)
-        self.assertEqual(loss_tensor.producer_args, {"predictions": y_pred, "target": y})
-        self.assertEqual(y_pred.producer, trainable_module)
-        self.assertEqual(y_pred.producer_args, {"x": x})
-        self.assertEqual(y.producer, data_source)
-        self.assertEqual(y.producer_args, {})
-        self.assertEqual(x.producer, data_source)
-        self.assertEqual(x.producer_args, {})
diff --git a/tests/data/an4_speaker.tar.gz b/tests/data/an4_speaker.tar.gz
new file mode 100644
index 000000000000..912bde741626
Binary files /dev/null and b/tests/data/an4_speaker.tar.gz differ
diff --git a/tests/data/asr.tar.gz b/tests/data/asr.tar.gz
index 944a91344165..f15bc61b936a 100644
Binary files a/tests/data/asr.tar.gz and b/tests/data/asr.tar.gz differ
diff --git a/tests/data/contextnet_32.yaml b/tests/data/contextnet_32.yaml
new file mode 100644
index 000000000000..5e56e0d44048
--- /dev/null
+++ b/tests/data/contextnet_32.yaml
@@ -0,0 +1,77 @@
+model: "ContextNet"
+sample_rate: 16000
+repeat: &repeat 2
+dropout: &dropout 0.0
+stride: &stride 2
+
+
+AudioToTextDataLayer:
+    max_duration: 16.7
+    trim_silence: true
+
+    train:
+        shuffle: true
+
+    eval:
+        shuffle: false
+        max_duration: null
+
+AudioToMelSpectrogramPreprocessor:
+    window_size: 0.025
+    window_stride: 0.01
+    window: "hann"
+    normalize: "per_feature"
+    n_fft: 512
+    features: 80
+    dither: 0.00001
+    pad_to: 16
+    stft_conv: false
+
+SpectrogramAugmentation:
+    freq_masks: 2
+    time_masks: 10
+    freq_width: 27
+    time_width: 0.05
+
+ContextNetEncoder:
+    activation: "relu"
+    conv_mask: true
+
+    jasper:
+        -   filters: 32
+            repeat: 1
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            separable: true
+            se: true
+            se_context_size: -1
+
+        -   filters: 32
+            repeat: *repeat
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *dropout
+            residual: true
+            separable: true
+            se: true
+            se_context_size: 256
+
+        -   filters: 32
+            repeat: *repeat
+            kernel: [5]
+            stride: [*stride]
+            dilation: [1]
+            dropout: *dropout
+            residual: true
+            separable: true
+            se: true
+            se_context_size: -1
+            stride_last: true
+            residual_mode: "stride_add"
+
+labels: [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
+         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
diff --git a/tests/data/get_squad.sh b/tests/data/get_squad.sh
deleted file mode 100755
index db376c1b2a21..000000000000
--- a/tests/data/get_squad.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-mkdir squad_data
-cd squad_data
-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json
-wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json
diff --git a/tests/data/get_tatoeba_eng_sentences.sh b/tests/data/get_tatoeba_eng_sentences.sh
deleted file mode 100755
index 0b344133f80b..000000000000
--- a/tests/data/get_tatoeba_eng_sentences.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-mkdir tatoeba_data
-cd tatoeba_data
-wget https://downloads.tatoeba.org/exports/sentences.csv
-grep -P "\teng\t" sentences.csv > english_sentences.csv
-head -n 900000 english_sentences.csv > train_sentences.csv
-tail -n +900001 english_sentences.csv > eval_sentences.csv
-rm english_sentences.csv
-rm sentences.csv
diff --git a/tests/data/pred_real/train.pred b/tests/data/pred_real/train.pred
new file mode 100644
index 000000000000..c1b343aabf7f
--- /dev/null
+++ b/tests/data/pred_real/train.pred
@@ -0,0 +1,3000 @@
+A Republican strategy to counter the re-election Obama
+Republican leaders justified their policy the need to combat electoral fraud.
+However, the Centre considers this a myth, stating that electoral fraud is rarer in the States than the number of people killed by
+Indeed, Republican lawyers identified only 300 cases of electoral fraud in the United States in a decade.
+One thing certain: these new provisions will have a negative impact on voter turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Unlike in Canada, the American States are responsible for the organisation of elections in the United States.
+It is in this spirit that a of American governments have new laws since 2009 making the registration or voting more
+This phenomenon gained momentum following the November 2010 elections, which saw 675 new Republican representatives added 26 States.
+As a result, 180 bills restricting the exercise of the right to vote in 41 States were introduced in alone.
+The new election laws require voters to show a photo ID card and proof of citizenship.
+Furthermore, laws also reduce early voting periods, invalidate the right to register as a voter on election day and withdraw the right to vote of citizens with a criminal record.
+Before the elections, no US State required voters to show a photo ID card.
+Indiana was the first State to impose such a requirement.
+In 2008, the Supreme Court of the United States the constitutionality of the Indiana law.
+The Republican authorities were quick to extend this practice to other States.
+Over the past two years, they sponsored bills in States to force voters to show a photo ID card.
+It is to note that, unlike Quebec, American citizens do not have a universal ID card such as the health insurance card.
+In fact, 11% of American citizens, i.e. 21 million people of voting age, do possess a photo ID card issued by a government agency of their
+In addition, five million new voters in do not have such
+And it often costs over a hundred dollars to obtain the required identity card.
+The new restrictions disproportionately affect young people, minorities and people with low incomes.
+In fact, 25% of Americans, of those earning less than $35,000; 18% of citizens over 65 and 20% of voters 18 to 29 years do not the required photo ID card.
+And that's not all.
+Students, voters considered to be voting more Democratic candidates, are not allowed in several States to use the photo ID card issued by their institution.
+On the other hand, these same allow fishing or hunting club members, who vote more Republican, to use the cards issued by these clubs when they vote.
+Prior to no State required proof of citizenship to vote.
+Arizona was the first to introduce such a requirement.
+Since 2011, a dozen States have adopted laws requiring voters to prove they are citizens.
+These measures are intended to limit the Hispanic vote.
+However, it appears that two out of three Hispanic voters favour the Democratic party.
+What is more, in 2011 Republican legislators sponsored laws abolishing the registration of on in eight States.
+In addition, they limited the right of individuals and groups to provide assistance to voters wishing to register.
+These restrictions are not without consequence.
+For example, the 2004 general election, voter campaigns contributed to registering around 10 million
+However, the measures adopted since 2009 have led a 17% drop in the registration rate of new voters 2010 compared to 2006.
+In addition, Republican legislators have enacted laws in five other States at reducing early voting period.
+For example, during the 2008 general in Florida, 33% of early voters were African-Americans, who accounted however for only 13% voters in the State.
+The same applied to Hispanics.
+These represented only 11% of voters, but 24% of citizens who voted early.
+On the other hand, 76% of voters were white but these represented only 46% of early voters.
+Of course, Democratic legislators and their supporters vigorously opposed the adoption of laws restricting voter registration.
+Several bills were blocked by vetoes of Democratic governors.
+The United States Attorney General intervened to suspend the most controversial laws.
+They were able to partially limit the damage.
+For example, only 16 out of 34 States have adopted laws requiring the presentation of a ID card.
+However, the new rules put in place will make it more difficult to exercise the right to vote in 2012.
+Democratic critics denounce the partisan character of the laws that have been passed and they see a clear objective of influencing the results in key States.
+A 2011 Brennan Centre report shows that the States that have adopted these laws represent 171 of the 270 needed in the electoral to win the Presidency.
+It is too early to say with certainty that these changes in the electoral system will have significant impacts on the the 2012 presidential elections.
+But thing is certain: these new provisions will have a negative the turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Prostate cancer screening: take the test or not?
+Indeed, the PSA test sometimes shows erroneous results false negative or even false positive results, which involve unnecessary medical interventions.
+Enough to make reluctant men to take screening tests.
+Take the test or not?
+We asked two specialists for their opinion.
+In studies conducted in the United States, there was a lot of contamination between control groups, so it is to interpret the data and make firm recommendations.
+Another study, this time a European one, concluded that there a in mortality between patients who screened and those who were not.
+This study also showed, with follow-up after 12 years, that it is 30 and 40% more likely for metastases to occur in absence of screening.
+I therefore recommend the test from age 50, or 40 if you have a direct relative who previously had prostate cancer.
+African-American men are also more at risk.
+The key is to the right decision once cancer has been detected.
+are aggressive cancers and others that are indolent.
+The patient really needs to be made to understand the degree of risk of his cancer, by offering him the available, not treating prostate cancers that are not long-term life threatening, and opting instead, in such cases, for active monitoring of the disease.
+Today, many men whom cancer has been detected will not be treated because their cancer is not aggressive and is not life threatening.
+Active monitoring will be suggested, and if the disease progresses, they will be offered treatment.
+More and more, specific criteria are being determined in order to decide who should or should not treated.
+Therefore I recommend taking the test.
+But the important is to have a discussion with your doctor to determine whether or not to take it.
+In collaboration with the Société internationale d'urologie [SIU], Movember has created a tool that makes it possible evaluate the pros and cons of the PSA test.
+You can download the document (in for time being, a [French] translation will be available at this address: http://ca.movember.com/fr/mens-health/prostate-cancer-screening
+Preventing the disease
+Unfortunately, there is no miracle for preventing cancer.
+Despite the progress in research, the adoption of healthy living habits remains the best way to the risk of suffering from it.
+It estimated that if everyone ate well and exercised enough, 30% of cancers could be prevented.
+"If no more people smoked, this rate increase to at least 50%," says André Beaulieu, spokesman for the Canadian Cancer Society.
+On the other hand, it is estimated that roughly 10% of cancers are hereditary.
+Some are also completely unexplained.
+For Canadian Cancer Society, the fight against tobacco remains a despite the decrease in the number of smokers.
+Cigarettes are linked to 85% of lung cancer cases.
+It is also a risk factor for a number of
+This massively damages people's health.
+"Even today, there are 1.5 million Quebec" deplores spokesperson André Beaulieu.
+Encouraging data: 10 years after giving up smoking, the risk of dying from cancer drops by half.
+Weight
+Overweight and obesity are also conducive to the onset of the disease, according to the SCC.
+They can increase the risks cancer of the breast, colon and rectum, oesophagus, pancreas and uterus.
+"Research shows that the regular practice of physical activity throughout your life protects against colon cancer" it is also said.
+Diet
+The organisation also recommends limiting your consumption of red meat.
+In large amounts, it increases the risks of developing colo-rectal cancer.
+Likewise, so do cured meat products, and these should be avoided.
+The conservation meat by drying or can cause the formation of carcinogens.
+"They can damage cells the body and lead to the development of cancer" it explained.
+Vitamins
+In recent a number of scientists have studied the links between vitamin supplements and cancer.
+For the time being however their research is inconclusive.
+Studies on vitamin E are contradictory, according to the SCC.
+While one noted a decrease in the risk of prostate cancer, another noted an increase.
+Also the effect vitamin D on cancer is not clear.
+In Mr Beaulieu emphasises the importance of discussing your concerns and family history with your doctor.
+"Taking a screening test doesn't give you cancer."
+The Higgs boson revealed
+The announcement of the probable discovery of the Higgs boson created quite a stir last summer, and with reason.
+Indeed, it is believed that this is part of the mechanism responsible for the mass of everything in the Universe, no less.
+Also it is the particle whose existence is predicted by the Standard Model - our or "less worse" of the and behaviour of matter - but which has not yet been observed empirically.
+But for physicists, it is still not completely sure that it really is the Higgs.
+We know without a shadow of a doubt that it is a new authentic particle, and greatly the Higgs boson predicted by the Standard Model.
+In new data unveiled this week at a large physics Congress in Kyoto seem confirm this, but there are still insufficient data to be perfectly sure.
+But let's suppose that it really is the Higgs, since the chances of being mistaken seem slim, and see what it is.
+In our world, there is a fatally unavoidable law which states that two things cannot meet at the same place at the same time.
+no way break this rule - and don't try too hard, you'll go mad.
+However, even though particle physics is a very strange world, turns that it also has a law of the same kind: the exclusion principle, which that two particles cannot the same space at the same time if they in same "quantum state" - this "state" consisting roughly of certain of their characteristics.
+Based on this, physicists classify particles into two categories.
+In one corner we have good citizens fermions, who wisely obey the Pauli principle.
+While lurking in the other are the bosons, a nasty band of anarchists who respect nothing - at all events, not this principle, which means that they can indeed be found in the same place at the same time.
+These bosons are then divided into two groups, according to the Berkeley Labs Particle Adventure site (absolutely extraordinary, by the way): mesons, which we will not discuss and "force particles" by which the great forces of are and to which the Higgs boson may be somehow related.
+These bosons, it must be stressed here, are not all exotic bugs as you might think.
+In fact, if you can read this it is thanks to an extraordinarily banal boson: the photon, or the "light particle" which is the "messenger" of the electromagnetic force.
+When, in fact, a particle having an electric charge accelerates or changes direction, this "disturbs" the electromagnetic field in this specific place, rather like a pebble thrown in a pond.
+This "disturbance" produces an electromagnetic wave (of light, infrared, ultraviolet etc.), and this wave is nothing other than a photon - and thus one of the "force carrier" bosons.
+More stable field
+The same applies to the Higgs boson, with the that it is another the Higgs field, which must be "disturbed" for the boson to appear.
+Now, this Higgs field is much, much more stable than the electromagnetic field; to it is to achieve very high energy levels, rather like a frozen pond which would need a very large rock to wrinkle the surface.
+Which is why a huge particle accelerator the one at CERN - Large Hadron Collider is a ring with a 27km circumference! - is needed to achieve such energy levels.
+The analogy with the electromagnetic field is again useful explaining the relationship between the Higgs and
+In fact not all particles, all materials, with electromagnetic field.
+Some, such as magnets, do so, but others don't - a piece of paper, for example, will never stick to a fridge.
+And likewise, not all particles interact with the Higgs field: those that do so have mass, while the others (such as the photon) do not.
+Now, what is it that all this research "can bring"? asks Plamondon.
+For science, it serves to check the validity of the Standard Model (SM), and also allows physicians to examine any discrepancies between the observations and predictions of the SM.
+A number of people, moreover, fervently hope that some will be found, because the slightest difference could open a door to a "new physics" and plug certain holes in the Model.
+This, it must be still has huge shortcomings, offering no explanation for gravity (oops!) or dark matter, which forms approximately 80% of the matter in the Universe (re-oops!).
+But to date such discrepancies have been found at CERN.
+Repercussions
+The of this research on daily life of the man in the street are more difficult to predict, it would be wrong assume that there won't be any.
+Remember: in the very early 60s, the pioneers of the laser at Bell Laboratories not suspect the revolution that would be triggered by their work.
+They had an inkling of the scientific applications, but nothing as to the rest.
+In fact, the late Willard Boyle - a physicist who worked at Bell Labs, where the laser was invented in 1960, and who himself developed the first continuous laser (the first pulsed) 1962 - told us that initially the laser was as a gadget."
+Just imagine...
+And then, applications can also come from all the instrumentation that surrounds research.
+For example, the same Willard Boyle developed a small light sensor in 1969, during his work in
+This sensor, although this was not at all the original intention, serves as an "eye" to all digital cameras worldwide, and earned him the Nobel physics prize in 2009.
+This does not of course mean that the activities of the LHC will necessarily transform our lives, it does that, actually, you never know...
+Palliative care - The best way to die... | Le Devoir
+With its Dying with Dignity Commission, recently discussed the delicate issue of the end of life.
+The debate is due to resume as a bill is being prepared.
+However, in this vital area, much remains to be done.
+Le Devoir attempted to look more closely.
+Just a few weeks ago Mr L. lived alone in his Montérégie apartment.
+The festering prostate cancer had allowed him a two-year respite.
+"They me five years to live, I've made it to seven," he says, with mixed emotions, lying in his bed at the Victor-Gadbois care home in Beloeil, where he arrived the previous day.
+"But it's still a shock, you can never be prepared for it" he adds.
+The disease is doing its work: huge weakness which prevents him going to toilet alone, and even eating alone.
+Sitting in front of an appetising lunch, he consents to helped to eat, resigned.
+Courageous, even to smile, talks to the strangers bustling around him, bringing him his medication, him a bath.
+The courage of ordinary death.
+"What I want most is to be cured of my diarrhoea, it's humiliating" he confided.
+A few hours later, the team found a cure for this illness.
+"During our lives, we learn that a man pisses standing up," says Pierre Brodeur, psychologist at the Victor-Gadbois home.
+to the stage a child, for some people, is an unacceptable humiliation.
+"It depends on the person's ability" to accept the he says.
+Because, in the opinion of a number of people working in palliative care, great moments occur at the heart of such regression.
+Patients at the Victor-Gadbois palliative care home all suffer from cancer.
+They have a maximum life expectancy of three months.
+At this the team of doctors and nurses surrounding them no longer provides so-called "curative" care.
+Mrs A., 89 years old, the worst fear is to die "conscious and suffocating."
+But the disease has me discover my children.
+"I have fine children" she adds.
+"I don't wish anything more in life" she says, before accepting having a mask put to help her breathe.
+She looks forward nevertheless, in the next few days, to a last visit by her son coming from Italy.
+At Victor-Gadbois, a group of volunteers provides bodily care and help with feeding.
+is palliative care, given when there is nothing else that can be done.
+To make death comfortable.
+In Quebec, there are palliative care beds for 11,700 inhabitants.
+This is very few when we know that we will all die one day.
+Here, life continues the best possible conditions, explains Dr Christiane Martel, one of the doctors at the home.
+Whether a physical comfort, emotional or spiritual level.
+A person who is dying will accept being helped drink brandy or Pepsi, whatever is their tipple.
+Diabetics no longer need to control their blood sugar.
+And death is part of everyday life.
+Yesterday evening, a beer was served to Mr X, who died during the night.
+This morning, it is son who will finish the beer at the feet of the deceased.
+"We help relatives as much as patients" says Nathalie Savard, Director of Care.
+At the Victor-Gadbois home, one day follows another but no two are alike.
+Along with a 93-year-old man who is savouring his last meeting with his family, sitting firmly wedged in his pillows while toasts are drunk in his a man is dying tragically, surrounded by his parents, his wife and his two children, after having tried everything to survive.
+"For six months, there have always been three to five beds which are occupied by cancer patients less than 45 says concerned Dr Christiane Martel.
+53% of patients admitted to the Victor-Gadbois home come from their homes, 47% from hospital.
+Lack access to palliative care
+It is said that 77% of simply have no access to palliative care, which is care designed to ease the pain when a patient has reached the terminal stage of life, be it at home, in hospital or in a care home.
+And a number of organisations, such as the Victor-Gadbois home and Palliative Care Society in Greater Montreal, specialise more or less exclusively in care provided to cancer patients.
+It is precisely large gap in Quebec health care which has made a number of palliative care fear the adoption of a law on euthanasia and assisted suicide.
+Since October, a manifesto, signed by palliative care luminaries including Dr Balfour Mount and Dr Bernard Lapointe, has been to demonstrate their opposition to such an initiative.
+According to Dr Christiane Martel, the Quebec health system is not effective enough to ensure that everyone will be entitled quality palliative care before it is accepted to proceed to euthanasia.
+Recently, she says, I saw patient spend 14 days in emergency, in great pain, without anything being done to ease her suffering.
+I'm afraid that patients ask to die because they don't receive adequate care.
+And at the same time, some oncologists work relentlessly on their patients until the last day, despite the worst prognoses.
+Hélène Richard's survival hopes were already minimal when she ended her gruelling chemotherapy.
+When I announced to my oncologist that I was stopping the treatment, she told me she that I had given up fighting, she said.
+However, she had told me I was finished!
+all-powerful care
+Dr Martel believes that 90% of patients asking to die thank care-givers for not having acceded to their request after they have been relieved of their pain by a palliative team.
+But it must be said that palliative care is not absolutely all-powerful in treatment of pain.
+According to Elsie Monereau, Palliative Care Director with the Palliative Care Society in Greater Montreal, patients are resistant to against pain in 8% of cases.
+At the very end of life, physicians then often resort to palliative sedation, which is equivalent to putting the patient to sleep until the time of death, either sporadically or
+We can no longer pretend to understand part of their suffering.
+Increasingly, an unrelieved patient will have the option of having palliative sedation.
+Patients who relieved always say the same thing: "I want to die."
+But this does not necessarily mean "I want you to euthanise me," it means "I to be relieved."
+This report was made possible thanks to a journalism award from the Canada health research institutes.
+Widespread real estate scandals in Quebec
+Day after day highway officials, building contractors, political party fund-raisers and Italian mafia specialists tell what they know of a formidable "system," combining the building industry, government officials, politicians, trade unionists and organised crime.
+An "industry" which has cost Quebec taxpayers dearly, especially the 1990s and 2000s.
+"It is curious how system is crumbling since we took drastic measures" says Duchesneau ironically, a Quebec politician and former Montreal of Police.
+It was through him that the scandal broke in 2011, in an in-depth investigation into corruption related to road construction contracts in Quebec, to which the liberal Prime Minister the time, Jean Charest, had consented only reluctantly.
+The "Duchesneau report" established a direct link between industry, under-the-table financing of parties and bribery officials.
+"Since the inquiry opened in 2010, he says, the Ministry of Transport alone reportedly saved a billion dollars on contracts," with certain people curbing their get a share!
+The Charbonneau Commission "has already brought down two mayors" he adds, hoping that it will succeed in "revealing the schemes behind the individuals."
+A permanent anti-corruption unit, created in 2011
+The Permanent Anti-Corruption Unit, created in 2011, is also coupled with its army of analysts, and auditors.
+Plus the "Marteau squad" policemen who, since 2009, have apparently led the Montreal "sewer soft pedal on the inflation of contracts...
+In recent weeks, it has conducted a series of searches and brought charges fraud and corruption against municipal politicians, such as Frank Zampino and Richard Marcotte, Mayor of a suburban town.
+Next on the list is apparently Gilles who has just resigned from his post as Mayor Laval, third largest city in Quebec.
+He is suspected of pocketing repeated bribes in exchange for public contracts.
+Others formally accused are Montreal highway engineers and Italian entrepreneurs, including Tony Accurso and Lino Zambito.
+The latter caused a sensation by explaining mechanics of the contracts "system" to the commission.
+He himself paid 3% of the value of the contracts obtained in Montreal to an intermediary linked to the mafia who turn paid the money to Union Montréal, Mayor Gérald Tremblay's party.
+Mr Zambito has handed money out freely in the 2000s, giving over 88,000 Canadian dollars (roughly 68,000 euros) to provincial parties, especially the Liberals then in power.
+He also admitted having organised an illegal for former Liberal Deputy-Prime Minister, Nathalie Normandeau.
+Sewer contracts with inflated costs
+In Montreal, the corruption "system" ran smoothly.
+Gilles Surprenant, former public works engineer, it in detail in front of the commission: in ten years, he from construction companies gifts, invitations to trips, golf tournaments, restaurants, hockey and bribes totalling 736,000 dollars, in exchange for sewer contracts of which he inflated the costs.
+Other highway officials admitted having their palms greased by inflating invoices by 30 to 40%, and by false
+Then an organiser of the Mayor's party, Martin Dumont, accused Mr Tremblay of deliberately closed his eyes to a parallel budget feeding his coffers with dirty money.
+Following revelations, Mr Tremblay resigned in early November, plunging Montreal into a major crisis.
+Chantal Rouleau was one of first women in Montreal to raise the alarm.
+Mayor of borough of Rivière-des-Prairies, to the East of the island, she in 2010 against the sale of municipal land bought for 5 million dollars and resold for... 1.6 million to developers, at the height of the real estate boom.
+70% dirty money in election
+On the investigation which will eventually be she says she "is following a thread in order to find out how the system - infiltrated by ants - works, to put a stop to the gangrene and catch the culprits."
+The process, says, is "painful but positive."
+The wound is cleaned, but Montreal would need its own investigative unit ongoing monitoring, to avoid the return of these questionable practices.
+How to clean house.
+Properly.
+Jacques Duchesneau notes for his part "officials stole hundreds of millions of dollars," he is especially concerned about the role of "elected people aware of the scheme," when they were not up to their necks in the scam!
+Estimating the share of dirty money in the financing of election campaigns in Quebec at 70%, he dryly: "I was told that it was only a pale reflection of
+The Quebec government proposes to limit donations to parties to 100 dollars, but this will not change the situation, he says: "Until election expenses are strictly limited, there will be dirty money in politics."
+He advocates a complete overhaul of the system for granting public contracts and funding: "We can't go any lower; getting the bottom of things, with courage, will help to rebuild the house on more solid foundations, with controls and laws."
+Although this story tarnishes the international image of Quebec and Montreal, Mr invites anyone laughing look their own backyard...
+"PSG is not FC Barcelona!"
+This season, you have taken on a new stature with PSG.
+How do you explain this
+It can be explained by individual awareness but also by the new dimension PSG.
+Some great players have arrived.
+Every day I'm making progress alongside them.
+The technical staff has also brought me a lot.
+Day by day, all these things help me raise my level of play.
+in a match, it's easier.
+Everything moves very fast in football.
+But I don't get worked up.
+From my debut at the INF pre-training centre to my transfer to Saint-Etienne, I've moved step by step.
+So you benefit from the competition brought in by Carlo Ancelotti...
+This summer's recruits are used to playing matches at a high level.
+They also know that every training session is crucial.
+Which is what makes a like me want to face up and my best.
+On the other hand, Carlo Ancelotti gives a lot as regards my position.
+He's supported by deputies like Makelele, who played in the same position as me.
+Is Ancelotti the man for job?
+Definitely.
+Ancelotti inspires respect among all the experts.
+Today he has no equal in Ligue 1, and he's one of the best coaches in Europe.
+He has masses of experience and has won many titles with clubs.
+He's worked with great players.
+I think he will bring titles Paris.
+In January, I had an encouraging discussion with him.
+I was just coming back from a series of injuries.
+The confidence he gives me also explains my performance.
+What importance do you attach to the first part of the season for PSG?
+In Ligue 1, Lyon overtook us at the top.
+But we're waiting on the sidelines.
+One of our main goals is the Champions League: we qualified for the last 16 in the right way.
+What is club's goal this competition?
+We'll try to go as far as possible.
+now on, anything can happen.
+But have something say against some very good European teams.
+First of all, we want to finish top in our pool, ahead of Porto, to have home in the last 16 match.
+Can PSG become a top European club in the short term?
+It already has the budget...
+To become a European club, Paris needs to win titles and keep it up over time.
+Today, isn't the case.
+Financially, PSG has the means to make it happen.
+In Ligue 1, would not winning the title, like last season, be a big failure?
+Definitely, it would be a major disappointment.
+This year, we're really committed to winning championship.
+We weren't far away last season.
+In May, there was great disappointment because we were good enough to finish first.
+It was a terrific season.
+We finished with 79 points.
+Normally, 79 points is good enough to be top...
+But another team, Montpellier, had an even more fantastic season.
+I think this the year.
+Even if big teams like Marseille, Lyon and Bordeaux are competing for the title, I think we have the weapons to win.
+Do you think the media expect too of
+It's normal for them to expect a lot from us given what's been and the players we have.
+We totally accept
+After we won 4-0 at home against Troyes and still found things to blame us for, that's definitely a bit frustrating.
+You wonder what more people expect.
+You're never going win every weekend.
+We're not FC Barcelona!
+We're trying to implement a game project.
+It time to build a team.
+The Champions League proved we could hold our own.
+at Manchester City who, for two seasons, have failed to qualify for last 16, despite also having spent huge amounts!
+Based on the amounts invested, you should be 15 points ahead at the winter
+That would be to ignore our opponents and the French Championship.
+Lyon and Marseille, who were no good last season, were "boosted" by the new PSG.
+This that Ligue is exciting.
+I hope that in May we will be able to smile in saying that, despite all the difficulties, we finally did it.
+PSG seem totally dependent on the exploits of Zlatan Ibrahimovic.
+So much so that people say there is a dependence."
+This means Ibrahimovic is very and scores a lot of goals.
+That's why he came, and he's proving he's the star of Ligue 1.
+He's everywhere he went that he a great a world star.
+Within the group, we respect the man and the player.
+And also he respects the men he has around him.
+What he has done is truly
+It pushes others to raise their level of play.
+Thiago who is one of the best defenders in the world, also helps everyone else progress.
+How did you get on Euro 2012 with the France team?
+A disappointment.
+I really wanted to play in this Euro.
+my injury prevented from getting any game time.
+I saw some things there and came out stronger.
+Today, I'm playing well in selection matches.
+Which is what I've been hoping for since my baptism with the Blues.
+I've learned the lessons from what happened in the Ukraine and I now owe it to myself to have exemplary behaviour.
+What do think about Didier Deschamps's first few months in charge of the Blues?
+He has the results he wanted.
+We're well placed in the World qualifying
+The coach is tough, to the players, and inspires them to win.
+Like Laurent Blanc was.
+But I don't want to make any comparisons.
+Blanc had achieved his goal when we qualified for the Euro.
+I hope Didier Deschamps will take the Blues to Brazil.
+Did the good draw (1-1) snatched Spain, on 16 October, represent a founding match?
+That match gave us confidence.
+Everybody fought for everybody.
+Before that shock in Spain, I'd never experienced a match in my
+With Bitcoin, pay and sell without banks
+The opposite of current monetary exchanges, based on central banks, transactions and processing fees among the parties involved.
+In addition, as often in these technologies, a political vision is palpable: the belief that the current monetary system, made banking monopolies, leads to financial crises.
+In fact, Bitcoin, invented by Nakamoto pseudonym), is both a virtual currency (but convertible into dollars, euros) and a secure exchange protocol like BitTorrent, which allows peer-to-peer file exchange.
+Around 200,000 transactions have already been recorded via 15,000 computers on the network.
+Close to a thousand web sites accept bitcoins as donations or means of payment.
+The bitcoin exchange rate, after reaching a peak of 30 dollars (23 euros) in June 2011, fell to 2 dollars five months later, returning today to around a dozen dollars (rates are listed on the bitcoincharts.com site).
+Nothing very impressive, compared to global transactions in real currency or financial products.
+the European Central Bank (ECB) took an interest in it in a report on currencies published in October.
+It describes bitcoin as "the most successful virtual currency," "in competition with the dollar or the euro" and "similar to conventional currencies."
+Bitcoin differs from other types of virtual currency such as 'credits', used to progress in a video game which you win by playing or which you can buy (and sometimes exchange in return).
+The social network Facebook has also developed this kind of system.
+But, on each occasion, a central authority controls and handles the exchanges.
+With Bitcoin, all nodes the network are both custodians the book of currency issuers, and buyers and sellers.
+How does the network operate?
+Each transaction between two users is actually carried out between two electronic addresses like with an e-mail.
+Except that a user can choose a different address for each payment, thereby ensuring anonymity.
+A set of information associated with this is signed electronically by a dual-key encryption system.
+So the network can the authenticity of the transaction.
+Using the contents of the file, it is also possible to that the exchanged bitcoins exist in the public book of accounts, broadcast across entire network.
+The key step is entering the new transaction in the book.
+It passes through the resolution of a mathematical challenge issued to the computers, and the winner, a kind of interim central banker, will have the privilege of adding this extra line.
+This is a file hashing phase, i.e. the transformation of a large file into a shorter and unique digital imprint.
+Computers "take" the new transaction and add a number to it, then "hash" it all up.
+goal being to find the number that gives a special imprint (lots of zeros at beginning).
+Once this number has been found, the other nodes can easily check that it is the right one.
+The transaction is then to the chain of all the other transactions; any modification would alter the imprint.
+If a user wanted to defraud by paying twice with the same money very quickly than minutes), only one of the two transactions would be validated by the network - the other would remain an orphan because the two have different imprints.
+The computer that resolves the challenge wins 50 bitcoins.
+To avoid inflation, this award is regularly divided by two, probably by the end of 2012.
+The number of bitcoins in circulation is therefore limited to 21 million, but they are divisible down to the hundred millionth, which leaves some margin...
+The difficulty of the challenge is also raised with each in computing power.
+The life of the network has had its ups and downs.
+Websites providing services for Bitcoin have been and bitcoins in deposits stolen.
+"The loophole used is not the protocol itself" says Pierre Noizat reassuringly, who has just launched Paymium, a real currency payment company that uses the Bitcoin network.
+The ECB also highlights the possibilities of money laundering using this anonymous service.
+But cash also has this weakness.
+Major players like Wikipedia donations of nature.
+Others, such as the WordPress blog platform, accept them.
+Recently, Adi Shamir and Dorit Ron, from the Weizmann Institute in Israel, analysed the accounting books and showed that almost 80% of bitcoins do not circulate.
+In November, "huge were launched.
+"Thirty thousand were exchanged" Jon Holmquist, who works for Coinabul, which converts bitcoins to gold.
+Pierre Noizat, also author of an educational book on this currency, has a lot of faith in the potential of this technology as a transaction network.
+His system, Paytunia, is equivalent to a credit card (in real money) or a contactless payment by mobile, but it uses to validate transactions, which are thus cheaper.
+Also the user manages his identity and can therefore be anonymous.
+The system is easy to implement by merchants, who do not need to install new terminals or software.
+They just need to provide an address a phone can "photograph and recognise" says Pierre Noizat, who he has thousands of users.
+There is a general movement to reappraise hierarchical systems for more horizontal systems.
+"It take time for Bitcoin to become firmly established, but 2013 be turning point," he predicts.
+The ECB, in its says it reassess the various risks, currently regarded as high, in the event of the currency's success.
+We got out of Afghanistan.
+What now?
+French troops have left their area of responsibility in Afghanistan (Kapisa and Surobi).
+and the Americans are due to follow in late 2014.
+It is time for the Afghan army to resume possession of its territory and the Afghan people to choose their future, without expecting us to do everything.
+It is mainly Afghan peasants that we have punished by regarding them as terrorists.
+And ourselves, with our 88 soldiers killed, plus the wounded, the maimed.
+The Taliban is composed of foreign extremists, former leaders in refuge in Pakistan, but often peasants who the presence of foreign armed forces, like in the time of the Soviets.
+They want to their traditions, both ancient and archaic, even though they have been joined by Jihadists, Pakistanis, Arabs, Uzbeks, Tajiks.
+Tolerated, sometimes assisted, by local insurgents, the latter will no longer be so when Westerners become more scarce.
+The departure of French troops from the Nijrab base, which I observed from the top of hills of almond trees planted with French funding, was carried out in an fashion.
+Convoys of trucks armoured vehicles reached Kabul without being attacked, overflown by helicopters.
+There will be no wave of the Taliban in by the end of 2014.
+Circumstances have changed their irresistible advance between 1994 and 1996.
+At that time Kabul empty, the country being torn apart by the struggles between different
+Their takeover of the country had been perceived then as a sort of liberation, a return to safety.
+Afghanis paid the price of the obscurantism of these peasants by the organisation of Al-Qaeda, but their situation has not improved today.
+Former Mujahidin, the Afghan Government and the current Taliban are allied in the desire to keep women in inferior position.
+main anti-Soviet leaders returned to power in 2001.
+They became profiteers, seizing government land to resell as building land to refugees returning from Iran and Pakistan, benefiting from huge American outsourcing contracts.
+They have become discredited; what is more, most of them did not fight themselves.
+The people, as I heard the countryside, want a Government that is not made up of thieves.
+Many young people want to leave, as those who were able to benefit from American will leave: the flight of capital is considerable.
+The young people are tired of war and its ideologies.
+They have rubbed with the modern world during their exile in Iran or Pakistan, and appreciated the benefits.
+Roughly 65% of the population is less 25; Kabul now has 5 million people, a fifth of the total population.
+In towns and cities, the state schools are full, with girls and boys alike.
+It will be necessary to provide work for those young who no longer want to return to the obscurantism of the former parties or the corruption of certain leaders.
+of them, including the armed opponents, are partial to mobile phones; television, with its Turkish soap operas that show a world, is everywhere.
+The army is now present.
+Will the authorities who command be considered legitimate?
+Former commanders of the anti-Soviet struggle are already thinking about restoring provincial militias, which escape the central power.
+Afghanistan, of mountains, with strong local identities, should be able to benefit from a certain decentralisation, in the image of the Western nations, but the United States wanted to turn it into a centralised with presidential power, abolishing the post of Prime Minister, which had existed since the 1964 Constitution.
+President Karzai does not any foreign controls, particularly on the occasion of the elections in 2014.
+But, since the 50s and already well before, his country has been dependent on foreign aid.
+No industries have been re-established, no dams are in good condition, no major irrigation systems have been repaired.
+Everything imported; nothing is produced, apart fruit and vegetables.
+The Priority left to private initiative.
+In a country ruined by thirty years of war, government control over the infrastructure would have been necessary.
+The rumour was spread that Afghanistan had huge mineral wealth.
+This only added to the feeling that the Westerners were only there to seize it.
+no energy to process the iron ore or copper on site, or means of transport to export it across the mountains, there no mining.
+The Chinese have already almost left the Mes Aynak mine, leaving international archaeologists (funded by the World Bank) to search the huge Buddhist site and remain the largest employers in the province.
+One day it will also be necessary for Afghanistan and Pakistan, on which imports and exports largely depend, to restore normal relations.
+The departure of French combat troops was completed on 20 November.
+The new cooperation treaty provides for the continuation of traditional aid: girls' high school, high school, French Department at the University, French Institute, cooperation in the military, legal, medical and fields, support to the archaeological Delegation.
+Since 2009, to try to "win hearts and minds" and achieve the impossible task reconciling aid and offensive actions, a "civil-military service from the Ministry of defence (Cimic), closed in 2012, has carried out, and continues to carry out successfully, through a small French NGO, many community and agricultural rehabilitation projects in dozens of mountain villages.
+These projects, involving large numbers of local labour, have helped to contain the insurgency: irrigation, wells, drinking reforestation, fruit trees, soil protection and increase in cultivable areas.
+What will we leave a souvenir, after two billion euros of military spending?
+much more modest budget would contribute to improving local living which are hard in these valleys often located over 2,000 metres sea level.
+The Embassy has received of written requests for small agricultural projects from local communities in Kapisa province.
+To be in a position to free themselves from uprising led by foreign which is what farmers told me they want, a small amount of civil aid be in their favour, well controlled and directly affecting them.
+A Constitution by force in
+A new gamble for President Mohammed Morsi.
+While Egypt remains more divided than ever around the constitutional declaration, which temporarily grants him full powers, he has decided to go for broke.
+Taking everyone by surprise, he announced on Wednesday that the Constituent Assembly would vote on its text the following day.
+Just a week ago, the head of State had given the Assembly two more months to its work.
+For two years has relied on a provisional text, several times this has weakened institutional stability and led to legal imbroglios.
+new initiative only served to enhance the divide in the country.
+According to his opponents, the President is persevering in his "autocratic delirium," continuing to "go back on his word" and the law."
+His supporters affirm that this is the quickest way to put an end to the institutional and political crisis, by speeding up the transition process.
+referendum is due to be held within the next two weeks.
+A very short period, which forces the Brothers to abandon their plan to explain text, article by article, to the Egyptians.
+For the President, it is also a way to popular and democratic legitimacy while the rages throughout the country.
+Mohammed seems convinced that Egyptians will vote favourably, as he stated in an interview with the American weekly Time.
+Particularly since a hasty vote smacks of an ultimatum to the Egyptian people: "Either you vote for my text, or I keep full powers," these supposedly expiring following of Constitution.
+It in a strange atmosphere that 85 members of Constituent Assembly, with a large Islamist majority, voted on text yesterday.
+Most of the were missing.
+In mid-November, shortly before the constitutional declaration, had slammed the door, feeling they had failed to assert their views.
+Representatives of human rights, religious minorities or civil society had done likewise.
+In to obtain a quorum, 11 members, alternates, were hastily added yesterday morning.
+Some of them are very to the Muslim Brotherhood.
+Not the articles were for the most voted unanimously.
+Commentators were also amused that one of the only diversions of the day was expressed with regard to... the hour of prayer, some Committee members feeling that the Constituent Assembly clock was wrong.
+text, which was being voted on yesterday evening, has 234 articles.
+The main focus of attention, article 2, remains in the final analysis identical to that of the 1971 Constitution, stipulating that "the principles of sharia are the main source of law."
+Salafist parties, for which the establishment of Islamic law is a claim, were hoping to replace "the principles" by "the rules," which would have allowed stricter application.
+For the Islamists, the fact this article was not amended is a guarantee of goodwill and their respect for the other elements of Egyptian society.
+respond the who see only a communication coup.
+Because in their opinion Islamisation of the Constitution is done through other articles.
+They refer in particular to article 220, which grants Al-Azhar University an advisory role, with particular reference to verifying the conformity of the laws with sharia.
+According to Egypt specialist Sophie Pommier, this is worrying because "the people upon to advise are not elected and have no democratic
+This suggests the beginnings a theocracy."
+The liberals' also fuelled by the fact that the next Rector of the university will probably be much less than the current one.
+"For the time being, there no concrete religious implication.
+With this Constitution, things remain under civil rule.
+Most of the lawyers who worked on this text are not Islamic law scholars but academics, some trained in the French qualifies Alexis Blouet, who is writing a thesis on the Egyptian constitutional transition.
+But acknowledges that "there be some ambiguity article 220, because the terms used borrow from the religious vocabulary.
+Reference is made in particular to "fiqh" [Islamic jurisprudence, Editor's note].
+And the question could be asked in future what extent civil judges are competent to pronounce on it."
+Beyond its religious aspect, the text voted on is highly criticised due to the extensive powers it to the President of the Republic.
+The Muslim argue that they are significantly reduced compared to what they were under the former regime.
+Another issue: the powers conferred on the army.
+In accordance with the of the military, the Defence budget review will be submitted to Parliament, but to National Defence Council.
+Nor will trials of civilians will be banned in tribunals, as requested by associations for the defence of human rights.
+Who also voice their concerns about the text, which they consider repressive.
+The offence of blasphemy is maintained and insults are now prohibited, which could have serious consequences on freedom of expression, particularly for the press.
+In addition, no longer does any the articles refer to the protection of women, highlights Heba Morayef, from Human Rights Watch.
+In her opinion, the only positive point is the prohibition of torture in article 36.
+The word was not included in the previous Constitution.
+While the Egyptian President was speaking yesterday evening on television, demonstrations are planned for this afternoon.
+Supporters of the Head State will march on Saturday.
+In Israel, holy places await Ukrainian tourists, the omphalos and a sea of saline water
+The Holy Land combines the splendour of biblical truths, modern comfort and primeval nature.
+AiF [Argumenti i Fakti] newspaper highlighted the five most important reasons why it is must to visit
+Let's worship the holy places
+It is worth visiting the River Jordan where Jesus was baptized.
+It is considered that all who enter this baptism "bath" are blessed by
+Galilee is the place where Jesus performed his magic: turned water into wine at a wedding, walked on water, calmed a storm, and filled the
+This is also Jesus came before his disciples and after the resurrection.
+But the biggest number of holy places is in Jerusalem.
+Believers walk through the Way of Grief or Via Dolorosa.
+It starts by the Antonia Fortress - Praetorium - where the judgement place, and us along the streets of the Old Town to the Church of the Holy Sepulchre on Golgotha - the place of the crucifixion, Stone of Unction and the place of Jesus' burial.
+This is also the location of the symbolic Christian omphalos, which symbolizes the salvation of mankind.
+The Holy Cross Monastery in Jerusalem is erected at the site that, according to Christian legend, yielded the tree used to make the cross for Jesus' crucifixion.
+Jerusalem the most holy places for the Jews as well - the Wailing Wall, which remained from a temple destroyed by the Romans in 70 AD.
+According to tradition, different faiths leave notes here with their wishes, which are then fulfilled.
+Travel along a vertical
+Ruins of the Massada remain from a secret refuge from enemies, built by in 25 BC for his family.
+They are located on cliffs in the mountains at an elevation of 450 m sea level.
+They can be reached on foot only by those who are into mountain climbing.
+Others are delivered to this historical mountaintop by a cableway.
+In the north of the country, at an elevation of 1600-2040 m, there is a famous ski resort called Hermon, which up with tourists in winter months.
+A shuttle brings people to it from the foot of the mountain.
+The total length of ski pistes is 45 km.
+According to an ancient legend, pagan used to live on the mountain.
+unique museums
+This country has about 300 museums.
+You won't be able to visit all of them on one trip
+But at least the five most interesting ones are worth visit.
+Among them - Museum of Israel, located close to Knesset (Parliament).
+It has ancient Qumran manuscripts and Dead Sea scrolls found in the caves of the Judean desert, along with about 500,000 archaeological and anthropological artefacts.
+The Museum of Art in Tel-Aviv is also interesting.
+Its exhibits include a wide range of impressionists and expressionists like Monet, Pissarro, Renoir, Sisley, Cezanne, Matisse, Modigliani, Chagall, Picasso.
+In Akko, you visit the bath museum Al-Basha, which consists several rooms of ancient Turkish baths with models of visitors bath attendants of the time.
+In Caesarea, it is worth visiting the unique private Ralli Museum, where you can enjoy the sculptures of Dali Rodin.
+There are no tour guides or gift shops.
+Entry is free of charge, and contributions are strictly not allowed.
+The fifth one is the Holocaust Museum or Yad Vashem in Tel-Aviv, which tells one of the most dramatic stories in history.
+The most tragic section is the children's memorial, built in memory of 1.5 million children killed concentration camps and gas chambers.
+You go in and find yourself in complete darkness.
+Stars are glimmering,
+and you listen names of Jewish children and countries where they died.
+Ukraine is mentioned there too.
+Wellness
+There are three resort areas Israel, located on the coasts of the Mediterranean, Red, and Dead Seas.
+Each have swimming pools, aqua parks, dolphinaria and oceanaria.
+It is notable that one can swim in Red Sea even in winter months, the water temperature does not drop below 21 degrees and the air warms to 23 degrees.
+The Dead Sea even warmer, and people swim in it round.
+Incidentally, it is most unusual sea in the world, located in lowest point of the planet - 417 m sea level.
+Its azure water is saline and easily keeps you afloat, even if you don't know how to swim.
+The surrounding landscapes are surreal in their beauty.
+People come here to undergo a course of treatment using salt water wraps and medicinal muds, and to improve their health if they have dermatitis, allergies, asthmas, eczemas, arthritis, bronchitis, or diabetes, or to balance.
+Touch the mysteries of antiquity
+They are preserved in the old section of - in the town of Jaffa on the Mediterranean Sea.
+The famous sea route connecting Egypt, Syria, Anatolia, and Mesopotamia runs through it.
+city is mentioned in ancient Greek and ancient Egyptian legends.
+According to legends, this is where Noah built his ark Perseus saved the beauty with whom he lived a long and happy life.
+Tourists really like to wander the narrow streets named after signs of the zodiac.
+They say, if you touch the walls on the street of your sign, fortune will come to you.
+In Jaffa, you can meet newlyweds who come from all over Israel and even from other countries for photo sessions.
+And in Caesarea - the city of King Herod you can walk around a Roman theatre, "capture" the Crusader fortress.
+the Roman period, Caesarea was the main city of Judea and the residence of Roman prefects, including Pontius Pilate.
+carefully restored theatre is now used for evening concerts and opera performances.
+A note for the tourist
+When you go to Israel, don't worry about your bad English knowledge: approximately 30% of the country's population speaks Russian.
+For trip, it is better to take dollars, not because they are easily exchanged for shekels (currently 1 dollar = 3.8 shekels).
+City is mainly buses, but Jerusalem has a high-speed tram, and Haifa has only subway line in the country, six stops and connecting upper town with lower.
+In essence, it is an underground cable railway.
+A ticket for any type of city transportation costs 6 shekels, and you can ride for 1.5 hours with transfers.
+According to the Jewish tradition, Sabbath is celebrated in Israel.
+Between evening the sunset on Saturday, markets, stores, and public transportation stop working.
+The work week starts on Sunday morning.
+Many cafes, restaurants and hotels have only kosher food, with no pork, seafood, fish with no scales, or dishes that combine milk with meat.
+There is a wide selection of dishes from lamb and beef, soups and desserts cooked using coconut milk, traditional hummus paste, various sauces, (balls made of ground chickpeas), fruits and vegetables.
+The streets of Israel don't have homeless dogs.
+But there are many cats, which walk around lazily.
+In the evening, they can even be seen sleeping on roofs of parked cars.
+These pussycats like places and do refuse treats.
+Car rental, on car type, costs from 37 (Hyundai Getz) to 188 (Audi A6, Volvo S80) dollars a day.
+Plus insurance of 15 dollars a day.
+Bike rental costs 15 shekels a day.
+Museum entrance costs 30 shekels on average.
+In numbers
+In 2012, three million tourists from around the world visited Israel.
+Visitors and arrive mostly from the USA, Russia, France, Germany, England, and Ukraine.
+Between January and October 2012 Ukrainian tourists visited the Holy Land, which is 51% more than a similar figure in 2010, before the removal of the visa regime on February 9, 2011.
+Only the and mighty" make it to migrants save money for language
+While deputies and human rights activists argue about the purpose of the law on mandatory language testing, the country already has artists who sell fake certificates.
+Every year, 13 million migrant workers come to Moscow, St. Petersburg and other in Russia.
+Mostly these are citizens of Central Asia: Uzbekistan, Tajikistan and Turkmenistan.
+Their only goal is to earn money to support families back home.
+A new law came into effect on December 1, which obliges every migrant worker to pass a Russian language test.
+For the moment, this law applies only to those who intend to work services, housing and utility services, household services, and retail.
+But with time - as promised by the Federal Migration Service - tests will become mandatory for all non-residents.
+In addition to language, Russian history and basics of the legal system will be tested.
+Language knowledge will have to be confirmed both to receive and to extend the work permit.
+An exception is in effect for citizens of countries where Russian is a state language.
+People who received education certificates and diplomas before the fall of the USSR in 1991 are also exempt under the law.
+Purpose, doomed fate, and the of rights
+points will be operating under the auspices of the Pushkin Institute of Russian Peoples' Friendship University of Russia, State University (MGU), St. Petersburg State University (SPbGU), and other Russian education institutions.
+Migrants can take the tests in all cities; more than 160 centres have been opened.
+The initiative to introduce the testing was supported by State members the Federal Migration Services.
+But human rights activists, asked the question repeatedly in the press before the law came into force: what will it actually achieve?
+will the obligation to know change for the Russians and for the non-residents?
+First of all, according to of the migration service, this will allow to reduce the number of people suffering from labour slavery.
+Many speak about protection of the of work migrants, explains the Head of representative office of the Migration Services of Russia, Viktor Sebelev.
+Rights protection should begin before their departure.
+Only the system of organized selection will enable us to solve 90% of the problems of foreign workers.
+Migrants without profession, education, who do not know Russian, who do not a medical certificate start to have problems.
+If a migrant does not understand the language, says Sebelev with certainty, he is doomed to come unconscientious people, who, pretending to help, will force upon him a "ticket" to terrible, cramped barracks where many others like him will suffer without food and documents, slaving away 12-14 hours a day.
+We receive many complaints from our migrants.
+"They are promised one thing at home, but when they arrive, they are lied to, their passports are taken, they are not paid what they were promised," confirms the Head of the Main Migrant Labour Administration of the Migration Service of Tajikistan Tolib Sharipov.
+Not be angry, boss!
+Nonetheless, many citizens of Central Asian countries, who plan to to work in Russia, admit that only their understanding of the language of the country where they are going is not good, but they can barely write in their own language.
+Naturally, this is not so much their fault, but due to very few Turks, Uzbeks, and Tajiks can afford even a basic education.
+Their families don't even have food to feed their children, not mention decent clothing, shoes, and supplies.
+After reaching adolescence, these kids go to work at the first opportunity.
+It is hard, if language knowledge is they admit.
+"You feel humiliated and inferior."
+But human rights activists note important point about the law on language.
+Testing will be conducted only for migrants who have legal status.
+If they have no status, will be no testing, nor any official work in the
+In the meantime, most of the migrant workers continue to live in Russia illegally.
+"Welcome, or No Unauthorized Entry"
+Many of the foreigners assert that receiving official status in our country not that easy.
+The reason lies in bureaucratic hurdles and the already mentioned language difficulties.
+In addition, legalization costs money: from 12,000 to 16,000 rubles.
+Whereas a fake registration is done quickly and costs only one and half
+Officers of the Russian Police know that we mainly have fake papers, without registration, hence the extortion.
+"They ask for a hundred or two for cigarettes, tea," Umed Khushkadamov, a citizen of Tajikistan, shared with journalists.
+"Roll up, don't be cheap, get your artwork"
+On the day of the law's entry into effect it turned out that not only migrant registration can be fake.
+A few forged certificates about passing language tests have been seized by Federal Migration Services officers already.
+Forged documents are printed on a standard printer.
+Naturally, they were not free for their of the migrants, who had hoped to facilitate the task of passing tests in this way paid seven thousand rubles for them.
+It is two and a times more than the process of official testing, which costs three thousand.
+Government officials and human activists agree that the main goal the near future is to protect the system from corruption, so that the certificates could not just be
+For moment, the authorities can promise migrant workers who could not pass the test the first time to give time to complete a basic language course.
+In addition, those who come without Russian language will be offered work in areas that not require active communication with people.
+The Ministry of the Interior does not put from the illegal market back into circulation
+The share of crime involving legal weapons is low
+Russian Ministry of the Interior is proposing to toughen up the for owners of civil weapons.
+This is the reaction of authorities to recent incidents: CLICK shots at weddings, where there were no casualties, and the massacre staged by Moscow lawyer Vinogradov, in CLICK the death of seven people.
+Policemen want to prohibit the carrying of weapons in public places and raise the legal age of weapons from 18 to 21.
+The idea was supported by the head the Duma Committee on Safety and Anti-Corruption, Irina Yarovaya, who promised that the amendments to the law on weapons will be brought the State Duma in the near future.
+Not everyone is happy that the Russian authorities are trying to the problem "tightening the screws."
+open letter appeared online, whose authors - representatives of different social rifle organizations - demand to abandon the "senseless
+The percentage of crime involving registered weapons minimal, said criminal Vasily Lesnikov to BBC Russia.
+According to the Ministry of the Interior's statistics, 142 crimes using firearms registered with law enforcement agencies been committed in the six months of 2012, whereas 1,168,000 crimes have been recorded in total for this period.
+of the open letter are that the of the law in the area of civil weapons will not prevent the criminal from to the "black" market.
+According to them, one can find any weapon a low price right now.
+Nonetheless, the Ministry of the asserts that the situation of the spread of arms is under control.
+Suppliers: from plants to officers
+The "black" market of weapons is replenished through several channels.
+There are five channels, colonel Viktor Baranets, who has worked in the Ministry of and the Staff for 10 years.
+Screenshot of the site that accepts orders
+First: "army or military loot," weapons that were stolen during the fighting in the Caucasus.
+"Weapons were by Russian officers and by the Caucasians," says Baranets.
+Next are "black weapons," stolen criminals from representatives of defence agencies.
+Baranets explains this covers weapons taken police warehouses and those stolen directly from law enforcement agencies' employees.
+Illegal arms are taken to be from military warehouses.
+Explosions have often been heard at military warehouses.
+"There are proven theories that some the fires were intentional, in order to cover the shortage," says the former military man.
+Manufacturers of weapons make their contribution, according to Baranets.
+"There are so many private weapons factories now, which do not endure competition on the international market and throw weapons from under the counter to black market, including Moscow," says the expert.
+Another source of the "black" market is trafficking.
+An especially number of guns and machine guns from poor countries like Kyrgyzstan.
+"There's production there, sometimes handmade; and a formed, which has organized a stream," explains the former military man.
+Where do the weapons come from?
+Experts counted the approximate share of each of the of supply of illegal weapons to "black" market.
+A report about this was prepared by the Centre of Problems and Public Management Planning in 2011.
+Experts analysed the reports of the Department of the Interior and Rosstat, criminology and open data from portals on weapons.
+The overwhelming majority of illegal weapons, according to the researchers, comes the military and security forces.
+Half of all arms on the black market are there of officials, whose work is connected with weapons," states the report.
+According to researchers' data, 17% of the time the weapons are received from armed conflict areas, 14% is theft during production, 5% is "black archaeology."
+A sales consultant of one of the weapons stores, who wished to remain anonymous, asserts that the weapons found by "black" diggers are not being bought any more, because they're too old.
+According to him, dealers go to the military warehouse for a new batch of goods.
+One piece, for example a TT gun can be bought a warrant officer.
+It is issued to him, given through the fence.
+"He takes it to the city and sells for 900 euros a piece with two magazines," he says.
+"The truth is that police are aware of that is why the crime detection rate is low, it conducts purchases from illegal weapons merchants," consultant.
+"Like in a luxury store"
+buyer and seller often find each other through friends.
+I looked at sites, blogs, till someone responded, offering me to to Begovaya station, where a man will be waiting for me to take me to the "corner" so we can
+I found out the price of the weapon only there
+military commentator Viktor Baranets
+To get a weapon, I need someone with connections, says the sales consultant. - I have an acquaintance, but not it's reliable.
+There are salesmen on labour markets, but one needs to "come" there conditionally "from John Doe, who asked to tell that his daughter lost a tooth."
+now, even if I need a few knuckledusters, I get them through someone I
+He also supplies them only to me, because he knows that won't give him away.
+Beginners look for weapons in ways.
+Former military man Viktor Baranets tried himself as a buyer of illegal weapons in the mid-1990's, when he was preparing to publish an article about this.
+The formulas are still the same, according to him.
+He was given an album of pictures with "anything and everything."
+"I felt I was in a luxury store," he recalls.
+According to Baranets, the buyer is not offered a pig in a poke - you try out everything.
+I, the potential client, am not just buying; we go to the forest the seller and set a target there.
+"I am given the opportunity to shoot, and when I am certain that the weapon is good, begin to negotiate," the expert.
+Store on a sofa
+Internet lead to sites and "Vkontakte" groups, where weapons "for different purposes" are on offer.
+No or personal meetings are needed.
+"It's enough to have a certain sum of money," says the advertisement heading on the website "Buy a pistol or rifle."
+Users leave their requests and ask questions.
+Can a buy?
+"Without a license, of course," asks user "John" (name is changed).
+"Want to buy a TT, Moscow," concisely requests "Fedorenkov."
+Security Service now spread a big network of fake sites and there are tons of potential buyers of military
+People come like hungry fish to bait, and then mine coal in Siberia.
+military commentator and former military Viktor Baranets
+I heard about this: normally the site is registered outside the area of applicability of laws of Russia.
+People accept orders.
+The buyer pays at an
+"In response, is sent with instructions on where the weapon is hidden," says Press Secretary of the Rights to Weapons organization Dmitry Kislov.
+Viktor Baranets confirms that after leaving a request on the site you can stay without a weapon and to jail.
+The Federal Security Service now spreads a big network fake sites and there are tons of potential buyers of military weapons.
+"People are like hungry fish after bait, and in Siberia mining coal," - he says.
+Makarov for 100
+When buying illegal firearms, 100 to 900 dollars is enough according to experts.
+According to Dmitry Kislov from the Rights to Weapons organization, a Makarov gun can be acquired for 100-300 dollars.
+The wait time is a month to a month and a
+It is shipped long-term storage warehouses by the mid-level management of these warehouses.
+According to official statistics of the authorities, the number of such crimes in Russia on the whole 7% as compared January-October amounting to 22,900, while the number of cases of theft and extortion of weapons, ammunition, explosive substances and devices dropped by 7.8%.
+Fast-food and supermarket workers are on strike in the U.S.A.
+Up to a fourth of all American teenagers have worked the register at McDonald's at time or another
+In the last few there is a wave of protest actions in the U.S.A. against low salaries in supermarkets of the Walmart chain and popular fast food chain restaurants like McDonald's, King, Taco Bell, Wendy's and Kentucky Fried Chicken.
+Right now, nobody is able to predict whether this wave will turn into the ninth wave or it is destined to fizzle out early.
+Actions are being supported by unions and a series of left-wing organizations.
+In addition to increasing the wages by of Walmart and fast food chains, the goal of the protesters is to create unions within
+This sector of the economy is not covered by any union movement yet.
+cents a year?
+Actions began last week after Thanksgiving, on Black Friday, massive sales drew millions of people in America, sometimes accompanied by clashes.
+On this day, some employees of the Walmart corporation, which employs 2.2 people around the world, left their workplaces and picketed together with unions and left-wing activists from the that sell products to people on low-to-medium incomes.
+Walmart sells everything imaginable, from hunting rifles and car batteries, to vacuum cleaners, eggs and milk.
+Products in its stores are on average 8% to 27% cheaper than in major supermarkets.
+So many low-paid Walmart employees shop only at their workplace.
+Availability and assortment made Walmart one of the biggest American corporations.
+According to critics, Walmart can afford to sell the products cheaply partly because it pays little to its employees.
+These latter also complain about hard work conditions, for example lack of lift trucks and hand-held scanners.
+Protesters on Black demanded a salary increase and that the cost of medical insurance provided the corporation went from 30 to 100 dollars a month.
+A typical Walmart employee, receiving dollars/hour, cannot afford this.
+Scientists from the Berkeley University in argue that if Walmart raises the average salary to 12 dollars/hour, it will cost the corporation 3.2 billion dollars.
+is about 1.1% more than it spends on salaries right now.
+If Walmart fully shifts the cost of increasing wages to the shoulders of each visit to store will cost only 46 cents
+In one they only spend 12.39 more than now.
+Walmart supporters happily that the protests took place in nine states and did not cause any at all to the corporation.
+Black Friday continued in its stores from 8 in the Thursday till midnight the next day, and during the period Walmart sold 5000 products a second.
+In total, its cash registers nearly 100 million transactions on Black Friday.
+Representative of the corporation, Dan Fogelman, asserted in an interview with a left-wing site, the Huffington Post, that a of "less than five" Walmart employees left the workplace, and the protest act was just "another PR trick" of the that organized it.
+"Free cash register!"
+Protests continued this week in New York, where their object was not Walmart (they're not so welcome in the progressive city, that is why they don't exist here yet), but McDonald's other cheap restaurants.
+says that it sells billions of portions, and despite this it doesn't even give you sick days or pay you for honest work!
+Jumaane member of the City Council of New York
+At the moment, the salary according to federal and NY law is 7.25 dollars an hour.
+Fast food increase it with time, but very little. On average their ordinary employees in New York earn 8.90 dollars/hour.
+Nobody earns less in this expensive city.
+I cannot understand how one can survive in New York on this money.
+Once upon a time, almost a fourth of American teenagers went through McDonald's, working part-time after living with parents.
+Few saw this as a source of living or planned to stay there for long.
+I continuously across interviews with McDonald's employees, who complain they have to survive this and sometimes even feed their children.
+On the other hand, there is a comment the Wall Street Journal forum, whose author notes that it is irresponsible to have children if you do not know how you will feed them.
+Participants of protest began at 6.30 a.m. on Thursday near the McDonald's on 40th street and Madison Avenue demanded that and cooks of the fast food chain be paid at least 15 dollars/hour, i.e. more than double their present wages.
+They also demanded the creation of unions in the fast food industry.
+American law the administration from preventing this or punishing activists of the union movement by nagging or firing.
+On the other hand, the administration does not often ease their life.
+But for objective reasons is hard to cover fast food with a union.
+One of them is the unusual turnover of employees.
+Disagreeing
+Noisy protests began on this day in a number of other cheap restaurants in Manhattan.
+The highlight of the action was the afternoon meeting near McDonald's by Times Square, where several local democratic politicians spoke out. One of them, Jumaane Williams, said: "McDonald's claims it sells billions of portions, and despite this it doesn't even give you sick days or pay you for honest work!"
+Demonstrators were supported by prominent NY democrats, like Bill de Blasio, a candidate for NY city mayor, who said: "We need to voice our joint support for the fast food employees, so that they can achieve fair wages and economic wellbeing, which every New Yorker deserves!."
+According to the New York Times, this was the biggest action of this in the history of the American fast food industry.
+But only a few hundred people part in it, and many of them were not fast food employees, which comprise tens of thousands of people New
+It is unclear right now whether this will spark a mass movement.
+the moment, the mind cannot be deceived too well"
+Among modern technology fans a popular topic is augmented lately seen primarily through the prism special glasses.
+At first, a functional model was shown by Google in the summer, at its annual conference. Then, in November, it was announced that Microsoft filed an for patent too.
+However, according to the conversation with the leader of group of interactive 3D technologies in the Cambridge laboratory of Microsoft, Shahram Izadi, glasses are a thing of the past for scientists in this company.
+They are drawn by the prospect of manipulating virtual objects in air with bare hands, creating virtual open spaces.
+- Please tell us, in simple terms, about the work your research group does.
+- We work on the interaction of people with machines, at the same time trying to expand the boundaries of this interaction.
+While people in general are stuck at working with pixels on a flat screen and sometimes pointing at them.
+We want to look 5-10 years into the future and predict cardinal changes in this interaction.
+For example, Xbox and Kinect sensors are step forward. Almost no Xbox is sold without Kinect today, because everyone likes control by gestures.
+- What awaits us in the future?
+- Despite the fact that Kinect the interaction to the physical level, much occurs on a flat screen, in 3D.
+Information entry has improved (the system receives more data), but output still needs to get better.
+We are trying to working on truly three-dimensional display systems based on various technologies, including projection technologies.
+We to let the computer world into physical world, make it more tangible.
+But for need to identify both the user and the space around him.
+Then we will be able to supplement the real world with virtual objects in much more convenient form.
+Above all, get rid of these stupid virtual reality helmets!
+- What do you think about voice control?
+a popular thing, but is it overestimated?
+- It clearly cannot be called cure-for-all - there's a of privacy, because do not want to let the others know about our actions and intentions.
+In reality, all types of interaction with computers are good, but each in their
+For example, we had a project to control devices in public places, in which we thought about movements, not wide movements, but small, reserved ones.
+were not recorded by a camera, but by hand bracelet that determined the movement of bones and muscles.
+It's big right now, but in theory it can be reduced to the size a hand watch.
+In general, the future lies in the mixed control, e.g. movement + voice.
+What do mean?
+- For example, how would you ask me to give you this bottle of water?
+You will talk and show at the same time.
+- Usually I just say.
+- Oh, that will be very hard to detect.
+- So you want to the users adapt to what the machine can or cannot do at that moment?
+- Not necessarily, but it is mutual approximation.
+I think in the near future, we will mainly work on developing new sensors that will enable precise determination of a person's reaction.
+This could be, e.g. laser sensors. They have a decent depth resolution, which is very important.
+- If we talk your work with Xbox Kinect sensors, what are your about modern cameras?
+Not enough resolution, depth or something else?
+- In the current generation what we can base ourselves on in working on recognition.
+Of course, it would be good to have eight mega pixels with 1000 k/s speed.
+It's not just the mega pixels, though, but the quality of the matrix and the depth.
+From the latter point of view, all current technologies are not good enough for us - this adds work to the algorithm designers.
+So it's important to remember about the resolution on the X, Y, also the Z axis.
+Speed, the number of images per second, is also very important.
+Human movements are relatively dynamic, and the current 30 k/s is really not enough, especially for gestures.
+Steven Bathiche from our laboratory created a touch sensor with a regulated delay from to 100 ms, while modern serial sensors are closer to the latter indicator (60-100).
+Not everyone understands how this affects the interaction between man and machine.
+In my work, would be very useful to have a device that does not require touching and would have more images per second.
+- the number of cameras need to be increased?
+- In Kinect there are three cameras now, one of which is actually infrared and the second one, the recipient of the signal.
+The third is a sensor of visible range.
+It is not applied to determine the object's depth.
+Potentially, a large number of cameras could solve the problem...
+Or make it worse, by increasing the required volume of calculations.
+It would be nice to create a flexible analogue Kinect, play with the flexion of camera disposition and see how this help in three-dimensional of the position.
+- As far as remember, Microsoft did not present its glasses to the public, unlike Google.
+Don't you think this is one of the most promising platforms from the point of view the everyday use of augmented technologies?
+- Certainly it is not very convenient to walk around with a smart phone in your all the time, but I think that the coolest option would be augmented reality, where you could shift from glasses to smart phone, projection display, and everywhere else based on a cloud platform.
+Glasses are a very personal device, that is their strength (private things are seen only you) and, at the time, their weakness - augmented reality based on glasses will not allow you to work on virtual objects together with other people.
+- Let us imagine for a minute that manipulation of virtual holographic objects in the air is available not only to Stark from Ironman, but to a regular person.
+There is one problem with this idea that the critics often point out: no tactile feedback!
+Hands feel nothing!
+What answers does your group prepare to this challenge?
+- In my lectures I often say that augmented is the seventh consecutive attempt at the interaction between man and machine.
+I think that the eighth will probably be addition of sensations.
+For now, one of the interesting tricks is to use the second hand as a sort of matrix for the image.
+It is great at registering pushes!
+But there are technologies that are really aimed at giving these "images in the air" a sense of tangibility, for example, the interference of several targeted ultrasound rays in a specific point where the located gives a sensation, but weak right now, as if someone blew on your fingertip.
+There are also wrist bracelets that affect the nerve endings in fingers, which also a promising area.
+- Have you tried to deceive the mind?
+To to think that it feels something that it should be feeling when it sees something?
+- This is a good idea and we haven't tried this yet.
+It conceals one challenge that will not be solved so quickly - how to a person, who is physically in a very limited space to believe that he walking along an open, almost limitless space; we are working on the concept of treadmills (not at all like in clubs), moving platforms, giant balloons.
+So far deceiving the mind has limited success, there's work for many years to come.
+That's what makes working on virtual reality so attractive to researchers - many things are in their very beginnings.
+Judgement calls instead of culture - Rosbalt.ru
+Rosbalt continues the project St. Petersburg Avant-garde, dedicated to residents who are ahead, in the avant-garde of culture and art.
+This top list already includes outstanding figures of art scene of St. Petersburg, whose achievements reach beyond the scope of the city, often recognized in Europe, bypassing fame in Russia.
+The new player in Rosbalt - the bold artist Miller.
+The whole city Kirill Miller, a man dressed all in red, who can be seen by the Russian Museum, or the Summer Garden, or at fashionable and shows.
+Kirill Miller's work always brings in crowds of people, no matter where they are exhibited.
+Kirill Miller one of the purely St. Petersburg social and philosophical storytellers and creators of new mythology.
+Kirill Miller is an outstanding man of the Petersburg avant-garde of the late 80's early 90's.
+Moreover, he is a city man, who makes people smile on the street and lifts everyone's spirit.
+Recently he took the street organ and became St. Petersburg's music man, because he was ready for this complex role with all his Bohemian existence, philosophy and image.
+- Kirill, why do you walk around the city all in red, not yellow or turquoise, for example?
+- chose the colour red as a fashion designer engaged in look and image.
+In this world, red is a compromise between artist, image-maker and society.
+Although in society, everything that is not grey causes aggression and agitation of bad kind.
+But my provocations are aimed at starting conversation.
+The whole history of my actions is an invitation to discussion.
+- When did you realise that you must be an artist?
+- At an exhibition in the Nevsky House of Culture, where my was displayed.
+It became clear to me that this is my path.
+Then, the wave of older free, unofficial artists was gone, while new, free artists like me were not understood.
+I became friends with the artists of the new wave, with post-Gaza-Nevsky ("post-gazonevschina"), which led to Pushkinskaya-10, and the wave was no longer.
+I'm drawn to theatre, clothing, music, all genres except literature.
+- And all this been united in your Art-clinic... - It was important for me to find myself in the centre of the culture of St. Petersburg, where all the best creative forces should come together.
+In 1995, I occupied the territory on Pushkinskaya-10, and while the renovation work had not started, there was a musical and creative club, Bohemian club, the house of the St. Petersburg Bohemia.
+Many were born there: NOMy, Tequila Jazz, I remember when Shnur was brought there with the Van Gogh's Ear project.
+Shnur and his friends lip sang songs, wearing tight leotards, and the now trendy composer Igor Vdovin was with them.
+When the group to play live, it became Leningrad.
+Trakhtenberg was the presenter of many programs before Hali-Gali times.
+We gave them Trakhtenberg, and a great career was on its way, but the basic education and mentoring he us.
+Gallery D 137, Griboyedov - all these echo the Art-clinic.
+That is where our staff and regular customers left for.
+I am hero of the last century, when culture meant
+In 2000, there was poll in the press, for the People Our City prize.
+I was nominated Artist of the Year, my climax to an end.
+In the new times, it is uncomfortable to work by old rules. I'm a man of truth, honesty and culture of the last century.
+In our time, it is easy to become popular, but culture and popularity are different. You can popular, but not very cultural.
+- Your work is marked by a recognizable style.
+- Many of my works are hits, with clearly reflected relevance and acuity.
+I will have a programme "Russian museum in clowns."
+Clowns are a timeless category.
+I was social before, now it is painful and scary to be like that.
+But everything is blurred in clowns, tragedy is removed.
+I like grotesque, I have grotesque ideas.
+For example, saving the world by totalitarian changing of clothes by order.
+Nowadays, people are judged by appearance, not their inner qualities.
+knows, maybe you cannot shake his hand, and need to spit in his face.
+And the lie will go away with the help of changing clothes.
+- Recently we saw you in the role of music man. - A cultural city should have such a character.
+fits the role better than I?
+- Maybe commercial art can also be beautiful?
+- Nowadays, commercial art should be neat, considerate, sweet.
+There is a disintegration of cultures.
+People used to get together in flocks, Bohemians liked one thing, the simple people, something else.
+Now, everybody is divided into micro societies, it's hard to be liked by everyone.
+I am not a hundred dollar bill to please all.
+Now you have to think who you will please.
+Now, each cult hero has 100 fans.
+- But several thousand come to Stas Mikhailov!
+- The cast-outs to see him, the sexual-social sphere is at work there.
+300 people will come for culture, not 10,000. In the end, there's less management, money, everything dies out.
+I have fans; the main thing is not to betray them, not to spoil what I have earned.
+In my youth, I painted such art that one collector had it hanging on the same wall with Falk and Larionov.
+I started with paintings, people usually end with.
+Concepts are often mixed up these days.
+People say: culture, consumer culture.
+There is no culture in consumerism, it's "from another opera."
+I am a man of yesterday's culture. I grew up on of who lived poor and died in poverty, refused money for the sake of painting.
+is the culture I'm for.
+- Kirill, what is St. Petersburg missing?
+Good cultural experts.
+There is such a thing: an official for culture.
+But not everyone can be in culture.
+Under the right rulers everything was different. Kings may not understood culture very well, but understood that they needed to stick with the right experts.
+There are good consultants in Moscow right now.
+Here in Petersburg, there are people who could be experts, but they are pushed to the side, because more advanced experts needed, who will correctly evaluate these experts and give way to them.
+Judgement calls are what thrive now.
+Even Erart, but they're different because they say honestly that don't accept all modern art. There are artists, who need to find other museums for themselves.
+- What does St. Petersburg mean to you?
+- St. Petersburg is not a cultural capital, Moscow has much more culture, there is there.
+It's hard for art to grow on our rocks.
+We need cultural but we now have more writers readers. This is wrong.
+In Europe, there are many people, who go art exhibits, concerts.
+Here, this layer is thin.
+We need to art as it was in the beginning of last century.
+is supported by the St. Petersburg grant.
+Give birth in space
+The earth is in danger.
+Global warming or encounter with a killer asteroid.
+Caravans of cosmic ships with humans on board leave in search of replacement planet.
+To save humanity, the question is how to propagate our race in conditions of weightlessness or on that replacement planet?
+I the choice is small.
+There are only two actual planets that can be explored even hypothetically.
+"Venus and Mars," says Senior Researcher of the P.K. Shternberg State Astronomy Institute (GAISh) Vladimir Surdin.
+But while conditions on Mars are more appropriate for life, Venus has temperatures.
+Life is possible only at a high altitude or on the orbit of Venus... in space.
+The question of reproduction in space began with flora.
+Half a century ago, were run on plants.
+Four generations of grown in orbit were no different from their earth counterparts.
+Then, insects were bred in orbit, small flies.
+In 1979, quail eggs were sent to space, to check how an embryo develops in weightlessness.
+We get an absolutely chick.
+But then the problem begins.
+"The problem is related to the fact that this chick needs to find support, needs to get on its feet and start moving," explains head of the laboratory of the Institute of Medical and Biological Problems (IMBP) RAN Vladimir Sychev.
+Having found no support, chicks were tumbling around in disorder.
+After 10 hours, the newborns experienced complete atrophy of instincts.
+Chicks did not react to light and sound.
+And the problem was that they simply died after four days.
+"We bred chicks twice there, and then stopped, because impossible to work with them there," says Vladimir Sychev, confirming the failure of the experiment with chicks space.
+last biological "mini-ark" with animals flew into orbit 16 years ago.
+In spring 2013, experiments will continue.
+However, same-sex beings will be on the Bion bio-satellite.
+There was an experiment with rats, who were to space with foetus.
+In principle, there was nothing extraordinary there.
+"This was bio-satellites, but again, it was a singular experiment and such research needs to be conducted," says Vladimir Sychev.
+landing, the cosmic rats had babies.
+But it's hard to solve the problem of reproduction directly in space.
+It's not an easy task.
+Animals simply cannot follow their sexual they're of their familiar environment.
+In principle, people, unlike animals, can.
+Homo sapiens have abstract thinking, and are able to create a fitting emotional background.
+Such experiments are not conducted for ethical reasons.
+But women have been flying to space for 50 years.
+The biggest risk was for Tereshkova.
+The most valuable thing for humanity is the female body.
+Our "Seagull" left nobody on earth could tell whether she would be OK after flying to space.
+Whether she will be able to give birth after this flight.
+"Nobody answered this question," says rocket and space industry veteran, Vakhtang Vachnadze.
+June 1964, only a year after flying to space, the first woman in space Valentina Tereshkova gave birth to a daughter.
+The child's father, Nikolaev, was also a cosmonaut.
+In 1988, the second woman cosmonaut, Svetlana Savitskaya, who went into twice and even worked in open space, gave birth to son.
+However, the remains.
+We have few, very few cosmonauts, who were OK and had healthy children after long flights.
+"What's more, it is dangerous even for orbital flights," adds pilot and cosmonaut, Hero of the USSR, Hero of Valery Poliakov.
+And yet, humanity needs to seek out some new avenues in biotechnologies, protection from radiation, creation of artificial gravity.
+Hydro-laboratory of CPK - mandatory phase of training for a flight.
+Here, cosmonauts practice skills of working in open space in conditions.
+Water imitates weightlessness.
+If for adults water is a foreign medium, although comfortable, for infants it is a native element.
+Small amphibians seem to confirm that life came to land from the ocean.
+There is a connection with the fact that an infant spends about 9 months in amniotic fluid in the womb; is easier to get used to after that.
+In principle, it is logical, because only two weeks pass from birth until the first bathing.
+"This is very little time to forget something," says infant swimming instructor Marina Aksenova.
+In other words, if for a newborn weightlessness is more natural, a woman needs gravity, earth's pull.
+Stomach and pelvic muscles usually quickly degenerate in the ability to push out the embryo is reduced.
+Well, let's assume that childbirth stimulators will work out.
+Maybe she will push out the baby in a special room.
+"Then - asks Valery Poliakov about this non-trivial issue.
+On other hand, a baby also needs artificial gravity.
+When a body does not feel the earth's pull, it does not form the skeletal and muscular system.
+is not to dress a newborn in orbit into a special loading suit for training, as they do with adults.
+He will simply not have what he needs to survive.
+"And this experiment, that we will go for by the birth of a child in a foreign environment, will lead to us bringing a handicapped, completely unadapted human to earth," predicts Chairman of the Committee on Bioethics IMBP RAN Igor Pestov.
+For the moment, birth of children in space is just a theory.
+However, with time, it will become reality, when earthlings will go to a faraway planet in their ships, and it will become the home for their offspring, who were born in space.
+Head: Svarc System audit has failed because of politicians.
+The Czech Republic has sound bodies and a good standard legislation when it comes to public contracts, but it lags behind in their application.
+This was said by Miloslav Kala, vice-president of the Supreme Audit Office (NKU) in an interview for Aktualne.cz.
+"The Law will never be perfect, but its application should be - this is we are missing, in my opinion," states Kala, commenting on the current situation.
+Similar conclusions are also reached by the joint audit from the Czech and German auditors.
+As an example of practice, they cite Petr Necas's approach to the so-called "Svarc System."
+The Prime Minister recently claimed that the ODS will not be burdening business owners with its checks - so is it forbidden or allowed?
+"The Law must out one way or the other and if it prohibits something, then even the Government's head cannot prevent the work of its department, which is duty-bound to monitor and enforce," asserts Kala.
+At the audit committee's session in the House of Deputies, you about a joint project between the Czech Republic and Germany, within which legislation relating to public contracts in both countries was compared.
+What exactly was this about?
+This is parallel auditing, which we began around two years ago.
+Simply put, this is about European legislation governs the handling of public contracts, followed by state legislations and then the actual practice itself.
+We brought all this and although the audit is not yet complete, some very interesting differences have become apparent - in general terms, our legislation might be even "more concise and complete," however the actual practice is in certain aspects better in Germany.
+This confirms that creating more and more concise rules is not enough, and that attention must be paid to the actual application of these laws.
+What does this project actually help you with, and what do you think its outcome will bring?
+This kind of joint audit could contribute to curtailing efforts to specify our law, to reduce and perfect boundaries, when it does not have such a positive impact.
+Economy means acquiring the required thing at a reasonable (which does not always mean the lowest) price, so that profiteering and possible criminal proceedings may be avoided.
+However, just because we have reduced the order limits, does not mean something will be procured.
+The system might become overloaded with the amount of paperwork, and those, who wish to look for loopholes in it, will be able to take advantage far more easily than if the limits had higher.
+These are domestic problems about the implementation of legislation relating to public contracts.
+How does the audit system work in Germany?
+there an office like the NKU, or is it organised differently?
+As far as the office is concerned, the functions like our NKU, and it is organised like ours, it also has a committee although it is appointed slightly differently, but basically both offices operate similarly.
+Powers are also similar to a degree, though of course Germany is organised federally, so these courts of auditors are also at the member state levels in this respect their system slightly differs from our own.
+The BRH can only audit federal money, known to us as state funds.
+Public funds, which, for us, are administered by regional and municipal authorities, are audited by the federal courts of auditors there.
+When it comes to their legislation, is it more straightforward than ours?
+Overall, I would not like to make a comparison without any specific data, nevertheless in certain respects Germany serves as an example, but it certainly cannot be said it is better in every aspect.
+Is this perhaps, they have better enforcement?
+That is certainly not true, but again, I prefer not to make such comparisons.
+It should be said that even in a country we perceive as exemplary, they encounter a range of problems.
+If that were not the case, they would gain nothing working with our office, would they?
+Coming to domestic legislation, what did the amendment to public contracts legislation mean for your office, its impact being already?
+The period the amendment came into force has quite short, so it has not manifested itself in our audit work yet.
+Since we carry out audits ex-post, a certain delay has be taken into account.
+As yet, we have only observed it within the process of preparing future audits - we have launched our new "fiscal failure risk detection" system, with which we have processed almost 14 thousand public contracts, and these have been analysed - that is where changes will clearly be seen, because of the changed limits, the adjusted conditions governing certain types of selection processes, and so on.
+So do you see the adoption of a benefit, or rather as another burden on the bureaucratic system?
+I this legislation is a step in the right direction, and I hope this will be confirmed.
+A problem, which may arise here, is that the law becomes "too constrained" and will not be enforceable.
+Under previous rules, parties being audited were already by their audit provider (for example, in the case of regional operational programmes, the regional office) to the fact that every infringement of public contracts law means a breach of budgetary discipline.
+it worth constraining law in this in that case?
+I do not think this the way.
+The system those who want to attack and abuse it, but not penalise those, who make a mistake on a technicality, which does not affect the final decision.
+This kind of system will only increase pressure on bureaucracy.
+So how can we get of this?
+Let's see where this takes us.
+The Prime Minister recently said the ODS not be burdening businessmen with audits of the so-called "Svarc System" - what does this mean?
+Is the Svarc System or allowed?
+Law must be set out one way or the other, if it prohibits something, then even the Government's head cannot prevent the work of its department, which is duty-bound to monitor and enforce.
+He may "Let us change this law and it," but he cannot say we should pretend it is not there.
+The law on contracts has relatively strict rules about the formalities which must be adhered to - which is right way to ensure public tenders are protected.
+On other hand, it a tragedy, when a bidder with the best offer is excluded on a technicality.
+The Law will never be perfect, but its application should be just - this what we are missing, in my opinion.
+Roads are icy in places, but mostly passable.
+In several places in the Czech Republic, the main roads are icy and snowy.
+However, the majority of roads are passable, extra care needed in places.
+Carlsbad region
+In the Carlsbad region, roads have been usable morning, though in some places they were icy and snowy.
+The temperature has dropped to between five and ten zero, though it is expected to get warm slightly during the day.
+Snowing in the region stopped, and a thin layer of snow remains in the lowlands.
+However, the ridges of the Krusne Mountains have around 30 centimetres of snow.
+In some locations there is limited visibility due to mist, according to the local highway service.
+The R6 high-speed motorway and primary roads in the region are now usable without restriction.
+Caution is, course, for example, on certain bridges, where the surface can be icy and slippery.
+All secondary and tertiary roads are also passable, including mountain roads.
+In certain stretches of these roads there might be remaining frozen and compacted snow patches.
+all, at higher levels, extra care should taken while driving.
+and Hradec Kralove region
+On some roads in Eastern Bohemia, there might be a risk of black ice, at higher altitudes and in the there might be a layer of compacted snow, to the Road and Motorway Directorate.
+The highway service is warning the drivers against black ice, which might occur at higher altitudes of the region in particular.
+Black ice may occur around Lanskroun, Usti Orlici, Policky, Svitavy, Vysoke Myto, particularly secondary and tertiary roads.
+The I/43 and I/34 roads have been chemically treated around Svitavy.
+Snow is affecting the roads in Krkonose and Orlicke mountains.
+At higher altitudes, a compacted snow layer on the roads around Rychnov nad Kneznou and Trutnov.
+In Eastern Bohemia the day will be mostly clear partly cloudy, and dry.
+Temperatures will be between minus three and plus one degree Celsius mostly, with a wind.
+Pilsen region
+The roads in the Pilsen region have been usable this morning, with extra care needed in some places. should take the weather conditions into account.
+The morning will be frosty, with ranging between three and degrees below zero.
+Due to the existing snow and subsequent drop in temperature, certain roads may be icy.
+Drivers should expect mist in places, though visibility will gradually improve.
+This information was reported by the highway service.
+The is drivable almost without but the road services recommend caution between the 80th and 131st kilometre marks.
+Most primary road surfaces dry and frost-free.
+Southern areas the Pilsen and Tachov regions may have icy patches.
+Secondary and tertiary roads are wet, and may therefore also have icy patches.
+Drivers should be cautious especially on less frequented roads in the Bohemian Forest.
+region
+Drivers should expect snow slush on the roads if heading for the higher parts of the Olomouc region.
+It is a result of the chemical treatment carried out at Cervenohorkse sedlo and on the way to Videlsky Kriz.
+Snowploughs were brought out by falling overnight, the Sumperk region, to highway maintenance, got around three centimetres of snow.
+other parts of the region, roads are mainly without restrictions.
+"In the region, traces of snow have remained at highest altitudes.
+Drivers should expect snow slush at Cervenohorske in the direction of Jesenik," the dispatch officer for the Sumperk highway service told CTK today.
+Their Jesenik counterparts also made an outing overnight; the roads all the way to the highest altitudes are now and wet following the chemical treatment, according
+The Olomouc region's roads are usable without restriction, while in the area of Sternberk drivers should in wooded areas, where roads have remained wet.
+Usti nad Labem region, Liberec region
+Since morning, the snowploughs reported several places, which are difficult to pass in northern Bohemia.
+Besides certain snow-covered places, or some icy frost patches, the mountain road from Telnice to Kninice in the Usti nad Labem region is also closed, according the police
+Temperatures remain below zero and roads are likely to remain snowy icy. In the lowlands, however, particularly southeast of the Bohemian Uplands, there are no problems and roads are mostly dry.
+No traffic hold-ups have so far been reported.
+Icy frost patches have been reported in particular by road maintenance around Steti.
+According to meteorologists the conditions for this were perfect rain and melting snow during the day, with a clear night and freezing temperatures.
+Adverse conditions are expected on the main sections of the I/13 road between the Usti nad Labem and Liberec regions.
+The closure of the Telnice Kninice road was caused by bent tree branches, which were down to road level by snowfall.
+Simon Ornest: At the concerts we want a fusion of positive energy
+What is your opinion on the end of the that come in less than a month?
+It is just another startler, which we like to latch on to.
+Together with The Tap Tap band, we tend to joke about it, saying that we might be the only band on earth that could draw enough positive to hold or avert the end of the world completely.
+In December you are even organising a unique series of three concerts against the end of the world.
+Can you give our readers some details on this?
+This is a nationwide fund-raising event, which we have been planning for the past two years.
+We decided to make use of the marketing potential of the end of the Mayan calendar, due on the 21st of December 11:10 a.m.
+On the eve, the 20th of December, at 9pm, 3 concerts will take place in parallel in Prague, Brno, and Ostrava.
+They will end at around the time when Kiribati Island in the Pacific, which is 12 hours ahead of us, reaches the end of the Mayan calendar.
+Who came up with this idea?
+Initially it was probably my idea, later we worked all the details out with our designer, Honza Augusta.
+from the fact that we to collect enough positive energy stop the end of the world, we also want to allow ourselves and the public to spare some thoughts for the state of our planet, we, day, hand it over to our children.
+On the occasion of the end of the Mayan calendar, we have also prepared a range of unique items, shoes, t-shirts, bags, and original keys against the end of world, which can be purchased at www.e-tap.cz to support our cause.
+The Tap Tap band, together with other artists, also recorded the so-called anthem against the end of the world, called "The End of the World is cancelled."
+It is already well received on YouTube, will it figure at the fund-raising concerts?
+Of course, for the grand finale, as long as the world does not end beforehand.
+It will be sung by all the artists at all the three concerts at the same time.
+The anthem will also be featured in a unique live broadcast on Czech Television.
+words were written and the role of Jesus in the video was played by Hanak, Xindl X also sings in
+How did you end up working with them?
+We collaborate also with other personalities of the Czech cultural scene, due to organising a lot of fund-raising events and concerts...
+We try really get them involved in these projects.
+turns that most of them are interested and enjoy working with us.
+What will the proceeds from the concert against the end of the world go to?
+Equipping the wheelchair-accessible educational Studeo centre, which is already in its sixth year, in collaboration with the citizens association Tap the Jedlicka Institute for the disabled.
+Tutors come in regularly to spend with the Jedlicka Institute's students and run activities, which they enjoy and interest them.
+The students themselves do not have the funds to afford tutors, so try provide this for them in this way.
+Within the construction project at the Jedlicka Institute, a separate building is planned, which we can move into this project.
+Every sees the appearance of several bands and artists.
+How do you select them?
+We have tried to compile a programme, which speaks for all ages, including children.
+For example, in Prague, Chinaski, Support Lesbiens, Illustratosphere with Dan Barta, The Tap Tap, Marian Bango and Jiri Suchy will
+Further details can be found at www.kpks.cz.
+Are you planning any "bombastic events" in the future?
+In May, we will be making our first appearance in the Prague Spring, so we will definitely be preparing a good line-up with some interesting guests.
+Next year, we would like to play at the Czech National House in New York, and I we will be in the USA would like to build in appearances in Washington and Chicago.
+Your international plans are not modest; you have already performed, for instance, in Madrid, Brussels, London, and Moscow.
+The Tap is nonetheless a band composed of handicapped people.
+How do you cope with these journeys in terms of logistics and organisation?
+It is not as scary as seem at first.
+We have five members in electric wheelchairs, which must be transported in the luggage area; we must also, of course, carry around with us a lot of luggage and instrument cases...
+Nevertheless, we have so far managed it without any problems, CSA and British Airways were well prepared for us, so much so that, on occasion, I was quite surprised.
+Even in which we have just returned from, it all went smoothly.
+Thanks to these international trips, you will have had a chance to specific accessibility issues, public attitudes to disability and so on.
+What have been your experiences so far?
+After Madrid, Luxembourg, London and other places, where everything functions better than here, we have witnessed that in the East everything is still in beginnings.
+Compared to Prague, Moscow is rather inaccessible; it still remains unusual there for a person in an electric wheelchair to be travelling around the city centre his or her own.
+Obvious things, such as wheelchairs priority in are not commonplace there.
+Fortunately, citizens associations are emerging there too that are trying draw attention to the problems faced by people with disabilities.
+And on the other hand, where do we still lag behind more advanced countries?
+are a lot of things, which still behind on...
+It is important to mention that improvements to the current situation always depend on the efforts of the people who affected.
+In London and Madrid it is completely natural for people with serious handicaps to be independently out in public, and they can use the toilets, go to the museum, or wherever...
+It less common there for large groups of people with disabilities to actively part in social life, in this respect with The Tap Tap we are a ahead!
+Public respect or accessibility is one thing, but it only when we can become famous athletes, artists, actors, politicians, or lawyers that things will begin to change.
+So far there are exceptional cases, people who are strong-willed.
+The Tap Tap band is currently very popular, but let us look back a few years, what prompted you in 1998 to form it?
+I began my job as a tutor at Jedlicka Institute, where I was surrounded by a lot of young people, who were interested in doing something.
+Since I am a musician myself - among others I play the saxophone - I started a music club with a colleague.
+With time, as our moderator Ladya Angelovic says, it grown a little out of our control (laugh).
+Your popularity has only come about in the last years, or am I mistaken?
+It is true that we have been helped by creating ties to famous singers and also by our proactive work on promoting the band.
+We realised that work, which goes on unseen can be like it never existed.
+Thanks to funds from the European Union we can even afford top quality tutors, equipment and so on.
+Was it your goal to take The Tap Tap to such heights?
+From the outset, I felt there was potential to do things a little differently.
+Show business is filled with where one the other.
+It is logical in its own new things are in hesitantly and take a long time.
+Things, which are unique, are few and far between, but I would dare to claim that Tap Tap is one of those things.
+A person's first impression on seeing you is, of course, pity - it is a natural reaction...
+But that pity is simply wasted, because handicapped people are not abandoned and suffering beings, need to be pitied.
+They are people, who can fully live life and blossom, assuming, of course, that they have the right environment for it.
+I say that a person with a succeeds in it is not just for them but for society as a whole.
+Has your success also been helped by your firm hand as leader, as many people are suggesting?
+If we want to achieve top class work, we must be uncompromising in many things and require a certain level of discipline.
+I think this is to be expected.
+Some people come to us with a romantic idea and their head clouds, and when they find out they have to go to twice a week, attend practice sessions and put up with a lot of time travelling to concerts, their enthusiasm quickly disappears.
+That is how works everywhere, with every group that wants to work and wants to achieve something.
+The Tap Tap band currently has twenty members.
+How many of those were present at the beginning in 1998?
+Only one, Ladya Angelovic.
+We are an open people come and people go, this is unavoidable.
+Those who have the the drive will always find our door open.
+The event takes place the day before the end of the world is on Thursday 20.12.2012 from 9pm.
+The venues will be Praha Incheba, Brno Fleda, and Ostrava Plynojem with performances from 12 bands and other musicians from the Czech Republic.
+All three cities will joined by a televised link-up at the close for a united rendition of The Tap Tap's anthem "The End of the World is cancelled"
+The concert's goal is to raise funds to equip the STUDEO multi-functional wheel-chair accessible centre at the Jedlicka Institute in Prague in the sum of 25 million Czech crowns.
+Admission fee to the concert is CZK, children under 12 years of age go free, tickets on sale from Bohemiaticket.
+Poland and the Cosmos.
+Last week the council of ministers of the European Agency admitted Poland as the twentieth member of the agency, being the second nation from the former Eastern Block (after the Czech Republic, which became a fully fledged member of the ESA on the 12th of November 2008).
+Poland began close cooperation with the ESA in 1994, and in the following years it has participated in a series of agency projects.
+course, Poland's path to the space had begun much earlier.
+Polish boffins devoted their time space flight even the Second World War, but were not always met with understanding.
+I look back, for instance, to the lecture of A Sternfeld in Warsaw's astronomy observatory, who, on the 6th of December 1933, presented ideas on his pioneering work Entry into
+The thoughts of the young engineer (born 1905) his audience cold, and years later Sternfeld remembered that Dr. Jan Gadomski had shown an interest in his work.
+In 1934, his work Entry into space, Sternfeld received the Robert Esnault-Pelterie and Andre Louis Hirsch prize in France.
+The above mentioned Jan Gadomski (1899 - 1966) later became a strong promoter of astronomy and astronautics.
+He published hundreds of articles in Polish journals, and wrote a series of books on these scientific subjects.
+Gadomski became a world-known promoter of astronautics his was, notably, recognised when a on the far side of the Moon was named after him.
+In 1925, Poland had already built a handcar which was supposed to be fitted with a rocket engine.
+Unfortunately, both the project's designer, and the project's details, are unknown.
+It not even clear, whether the rocket was intended to start the handcar or to slow it down.
+Information about this rail track is only known from press articles of the time.
+In 1933 the Polish artillery started their engagement in flying bombs.
+The research was undertaken by the Weapons Technology Division in collaboration with Prof. Mieczyslaw Wolfke and Prof. Gustaw Mokrzycki.
+From the documents, it is clear that the research reached the stage of practical tests.
+Of course, the advance of the German army interrupted the research.
+In 1937, the concept of a photoelectric homing rocket designed by engineer Rohozinski appeared in the trade press, and in the following year The - air torpedo and flying rocket-bomb appeared, authored by Leliwy-Krywoblocki.
+Both projects destined for military use of rocket engines.
+Immediately prior to the War, all projects for military use of rocket technologies were overseen by Provisional Scientific Board (Tymczasowy Doradczo-Naukowy) that coordinated all the work.
+The Board was appointed in 1937, but after two years of activity operations were ended the start of the War.
+Further work devoted to astronautics appeared in the Polish Press after the War thanks to the Polish Astronautics Company (Polskie Towarzystwo Astronautyczne).
+The first reference to the company figures in the November issue of the magazine Problems in 1954, in which four in-depth articles are on the subject of astronautics.
+In one of these, by Prof. Subotowicz, the establishment of a company is proposed, which would dedicate itself to
+At the time, there were already projects underway for artificial satellites and it was clear that cosmic research was an emerging sector.
+From the beginning of 1956, the Polish Astronautics Company (PTA) sought entry to the International Astronautics Federation (est. 1951) by autumn the PTA was already full member.
+In the following year, the PTA's first Kazimierz Zarankiewicz (1902 - 1959) was appointed Deputy Chairman for the International Astronautics
+He served in this capacity until his death in 1959.
+From 1956, the PTA played a significant role in successful development of meteorological rockets RM (Rakieta which became the first Polish rocket to enable scientific research.
+The first RM-1 model was completed in 1957 and the first launch took on the 10th of October 1958.
+The rocket, a ceiling of 1800 metres, measured around 80 cm in length and weighed a little under 5 kg.
+Later, the improved RM-1A version was constructed and in the summer of 1959 launch tests were initiated for the two-stage RM-2 rocket in the Bledowsky Desert.
+The rocket was 1.4 metres in length and weighed approximately 11.5 kg.
+A further development model was designed for real scientific work - the RM-34 rocket was to reach 14.5 km and be tasked with monitoring high altitude winds.
+Of course, in 1962 further research was stopped.
+The successor to the RM rocket the Meteor-1 rocket, developed from 1962 to 1965.
+The rocket was designed as a two-stage with a total length of 510 cm and a launch weight of 32.5 kg.
+Three models were developed (designated Meteor-1A, -1B, and -1C), which differed in the room available for scientific apparatus.
+In the Meteor-1A rocket, a space of 0.4 litres was available, Meteor-1B had 0.34 litres, and Meteor-1C had 0.62 litres.
+The maximum altitude for all three models was 37km.
+Between 1965 and 1968, the development of Meteor-2 was underway in the Aeronautics Institute, with its first launch tests in October 1970.
+The Meteor-2 rocket had a launch weight of 380 kg, and was capable of lifting a useful of 10 kg a height of around
+Subsequently built models were the Meteor-2H and Meteor-3.
+Poland's admission to COSPAR (Committee for Space Research) in 1960 should be mentioned, as well as the appointment of a national COSPAR board two years later.
+Poland also participated in the Interkosmos space for space research on artificial satellites, and in 1978, the Polish pilot Miroslaw Hermaszewski became the second intercosmonaut after Vladimir Remkov.
+Abolishing the legislation on public works is not the solution.
+Last week the Constitutional Court abolished the law public works.
+The resolution caused public
+It will certainly be interesting to look at this issue from a broader perspective.
+Liberally financial systems in the EU, just as those in globalised world, are based on the principle of an unregulated economic competition.
+Its effect means that individual financial entities and national economic systems are in a state of permanent conflict among themselves.
+The is the principle of free trade and free, completely unregulated movement of private capital together with uncontrolled financial speculation.
+Due to significant labour cost differences (salaries) there pressure on prices.
+On this basis, it should be understood that when a supplier tries to compete in a commercial tender by importing cheap goods, "the rug is pulled" from under the competition's prices to capture a greater market share and, in this way, increase its own profits.
+On a wider scale, this means most businesses must move production abroad, import cheaply from abroad, or close down. The result is high unemployment in countries where labour costs are high to other economies.
+Since private capital is not bound by social responsibility, and therefore also not by the unemployment it causes, the social costs born by state must increase.
+The whole situation is bolstered by the businessman's complete unwillingness to pay taxes, which would alleviate the economical and social harm caused the pursuit of profit.
+The situation is so well known that there is no need for actual statistical data.
+The ruthless private capital practices create particular economic situations, where the State these countries is forced to enter in mutual competition, aiming to artificially lower the social standard of its own citizens in order to attract foreign investment.
+In other words, governments stake own because of private capital while disregarding the drop in social
+This occurs chiefly in amendments to existing law.
+The aim is to economically force the domestic population to prices dictated by private capital, especially in terms of salaries.
+On one hand, this economic system of force, in case of long-term unemployment, on the other, restricted employee rights in the workplace.
+This yields growing poverty and an increasing void between the poor and the rich.
+Germany there are already a host of food hand-out centres for the poor, who are not able to feed themselves on their own wages.
+The number of these people is already in the millions.
+In name of improving the competitiveness of the German economy, it commonly occurs that properly employed people receive such a salary the State needs to top it up to the minimum wage.
+Just such a scandal was revealed in the case of auxiliary staff in the Bundestag.
+The measures for all the southern EU states will undoubtedly lead to the situation, where people are pressured by a catastrophic in living standards to emigrate as it was in the 19th century, or to eke out an existence on starvation wages on the edge society, in the hope that the country will eventually see some foreign investment.
+At this point we have to ask where this may come from?
+If it is to come from other EU states, then poverty is being shifted from one country another, or it will not come at all, because Chinese, Indian, Brazilian, Turkish, Moroccan, Egyptian, and African labour is still at a fraction of European wages.
+This applies to all Latin America.
+Liberal theory and the Media claim that the State may not participate with capital in its own economy, and that a controlled economy to economic ruin.
+Private capital cruelly insists on the viewpoint that the State must not intervene in the economy.
+Thereupon, we should ask ourselves whether private capital has no influence, whether it actually leads politics and thereby the country, for its own selfish ends.
+Here, the answer must be yes.
+The proof is the existence of the almost omnipotent, and in all states, omnipresent lobby.
+The result is a desperate situation manifesting itself through corruption, through mutual benefits and legislation, where almost everything criminal, but nothing is punishable.
+In Germany the situation is such that state ministries, through lack of financial resources, out the drafting of to private law firms, who are basically connected with industry.
+These laws are then approved in the Bundestag.
+Real power does not come from the people as the Western-style constitutions claim, but from strong financial organisations that look their own interests.
+It is clear that liberally-orientated democracies will now quickly a situation, as described by Appian in his work on the Roman Republic Crisis in the time of Cesar and Pompei: "The State was already long in degeneration its offices taken by force.
+With bribery, illegal acquisition of benefits, and with stones or swords.
+Bribery and corruption were rife and unhindered, and the people would vote for a result which had been bought" ..."people with principles did not run for office, so on one occasion the whole debacle meant the state went eight months without consuls.." .."There was actually talk about the only answer to this situation being autocracy, and an energetic be elected." Appian had Pompei in mind, but it Cesar who changed democracy for autocracy permanently.
+The conclusion, just in the current society is built on unscrupulous tendencies for personal gain without to the interests of society as a whole.
+Private capital in its present state is not able to understand interests of society as a whole.
+The outcome is now, as it was an unprecedented decadence of elite with no attempts whatsoever on deeper reaching reforms.
+The causality of the rise of the fascist and communist regimes should therefore be sought in the misguided liberalisation of the economic in the 19th and 20th
+The current state of affairs, when we consider the demise of those systems in favour of liberalised democracy as an interlude, can expect its next cycle.
+The particularly reality is that the current elite is completely ignoring the potential lost of hundreds of thousands lives, humanitarian and social disasters, which we are already witnessing, as well as crimes against humanity, as we are familiar with from ancient and modern history.
+abolition of the on public works is not the answer, at least not in the long term.
+Under the pressure of economic competition, internationally as well as within Europe, the Government of the Czech Republic will be forced to pursue ways of lowering the population's living standards.
+This pattern is thus systemic.
+To address this, there are targeted political and social reforms, which strengthen the state's capital participation in the economy, increase the people's influence the state and weaken the monopoly held by private capital over society in favour of the state.
+Chaos Lab.
+"Nothing comes from and nothing ever could," from Sting's Fragile, where one of the main verses from the refrain is "Lest we forget how fragile we are."
+"If sons did not want war, there would be none," said the dying Gutle Schnapper, wife of Mayer Amschel Rothschild in 1849.
+The latest wave of violence between Israel and the Gaza as always, has sparked a lot of reaction.
+Some stand by arguing it has the right to self-defence, and Palestinians portrayed as terrorists, others support the claiming racism by the state, claiming that genocide being committed against Palestinian Arabs, and that Israel is a terrorist state.
+I do not want to dwell, in these repeated periodic waves of killing, on is the transgressor and who is the after all, today's inhabitants of Israel, including the self-governing territories, were born into the current political situation, and not live through the start the violence.
+I would like to offer the readers a peek behind the scenes, a look at whom, of all, this 95-year long tension is serving (starting from Balfour's declaration in November 1917) on this small piece land in the Middle East.
+Some my thoughts are supported by available historical facts, while others are derived from my understanding of who, that is, which group of people is the main source of events in modern history.
+Human history is in the first instance about the struggle for power.
+In every era we can find an Alexander the Great or a Napoleon.
+What is not quite so apparent is whether these were the who had chosen path independently, or whether behind throne stood someone who directed their actions towards a pre-calculated goal.
+We must accept that we live in a time when the world's wealth is concentrated into the hands of a few individuals, and that this concentration of wealth and the power it exudes could not in one generation's
+Among these astronomically rich families, one stands out, which be considered the puppet master (whether someone else stands above them, I am unsure, but I would not rule it out) - the Rothschilds.
+Not much about them.
+Understandably.
+The first news agency (Reuters) they bought in 90's of the 19th century, in order prevent their name being connected with acts of high criminality, which appeared in their background and which always securing power, increasing wealth, or both.
+They hold majority stakes in almost every central bank in the and against countries, where they do not hold a stake, they are either waging or preparing for war (before the assault on Afghanistan it was 7 countries, after Iraq it was 5, after the overthrow of Kaddafi 4 remained, but in meantime Russia submitted its central bank to the Russian Government).
+Whoever attempted to defy this family died.
+Abraham Lincoln refused to renew the status of the central bank to the Rothschild Bank of America, and during the Civil War he began to issue his own (that is state-issued) money and was assassinated in 1865 at the theatre.
+JFK his own money and wanted to close the Fed (Federal Reserve), and was killed in 1963, Congressman Louis McFadden was poisoned in 1936, after he had intended to the Fed causing the Great Depression 1929.
+Their thirst for global power led in the years of 1859 - 1871 to the formulation of a three-world-war plan by the freemason leader of the 33rd degree, Pike.
+The first war was to remove the large monarchic state bodies in Europe, the second was to remove colonial rule, especially from Great Britain, and the third will reduce the world's population to - 1 people (this number of slaves will suffice for comfort and luxury, and will not use up so many resources), the creation of one universal faith (ecumenism is just an appetiser for this solution), and finally the seizing of absolute power.
+The method, which the group of wealthy families with the Rothschilds leading the way, is the instigation of followed by the offering of a solution (order ab chao - order from chaos).
+These solutions are false, and always lead to a worse situation (vide establishment of so that the crisis of 1907 would not be repeated).
+Thus, having succeeded in assassinating Ferdinand, the Habsburg heir to the Austro-Hungarian throne, in Sarajevo thereby unleashing World War they destroyed tsarist with the Bolshevik revolution.
+The First World War ended abruptly, militarily and with capitulation (the was no longer needed to destroy tsarist Russia) and the central European powers of Austria-Hungary were subsequently dismantled.
+To facilitate the inception of the Second World War, they allowed bankers and politicians to create a latent conflict situation by saddling Germany with huge war reparations, thereby making a radicalist example of the impoverished masses, it remained only introduce a sufficiently convincing culprit and a leader with a simple solution, while also creating a Czechoslovakia with a strong German minority to play, and indeed did, the role of a fifth colony, once the war had been ignited.
+At the end of the 19th Century, the Rothschilds instigated the establishment of the Zionist movement, one branch of strove to form the Jewish State, seeking out an area of historic Jerusalem, to make its capital (the Return to Zion).
+The aforementioned Balfour Declaration formed the basis for the mass immigration of Jews to Palestine, where the first conflicts began with the local Arab population.
+Terrorist attacks occurred on both
+World War II broke out, and whether Hitler broke free from the leash, which international bankers were holding him on, or whether his actions were all part of the plan, is difficult to determine, nevertheless the suffering of European Jews in the concentration camps created the foundation to the world's acceptance of the Jewish State.
+Israel was officially in 1948, and just as war reparations for World II were layed on Germany, the announcement of the State of Israel the third war's hotbed.
+Provided the international bankers succeed, the Jewish Nation, as with second, will be victims on the front line, now together with the Arabic - or more Muslim - population of the Middle East.
+Israel is like a huge laboratory, a of discord and chaos not only within the country, but on an international level (just look at how strongly people are split into supporters and opponents of Israel).
+is the wrong-doer and who is the victim in the Palestine-Israel conflict, where injustice breeds injustice in an endless cycle of violence, while began from the greed of a few and their lust for global power?
+Here, we must differentiate between Israel's general population and their leaders, just as it happens here, the international introduce their own selection of candidates for people to vote for.
+Israel's current prime minister, Netanyahu 'the hawk', is a typical example of a fascist politician, loyal to the international bankers, who does everything to instigate war with Iran, which due to its membership in Shanghai Cooperation Organisation (China, India, Russia, Pakistan, ...) lead to a greater threat of global conflict, and through its control of the Hormuz Strait, where 20% of the world's oil must sail (the channel is only 2 miles to the destruction of world's economy.
+In what light stand the words, spoken by David Rockefeller in 1994: "All we need is a crisis and the nations accept the New World
+The New World Order in their eyes is one of master slave.
+A world where the of human population serve the luxury of a handful of financial aristocrats.
+A world, where each new-born is implanted with a chip, which makes their completely subjugated.
+"He forced everyone, and great, rich and poor, free and slave, to receive a mark on his right hand or on his forehead, so that no one could buy or sell unless he had the mark, which is the name of the beast or the number of his name.
+If anyone insight, let him calculate the number of beast,
+for it is man's number. His number is six hundred and sixty six."
+Argo: When things are at their worst - call Hollywood.
+November 1979, a mob of Islamic student demonstrators took over the American embassy in Tehran and held 52 diplomats hostage.
+They were to be released in exchange for the overthrown Shah Mohammad Reza Pahlavi, who after the revolution to the USA, which had supported his regime for several decades.
+For the American administration the situation did not offer a positive solution - it could not throw the Shah overboard, because this would seriously jeopardise the trust of allied countries.
+The release of the hostages in Iran, where the revolution resulted in the establishment of the theocratic regime, could not be achieved.
+This was a blow to the of the United which was later compounded by the fiasco attempting to the hostages by force.
+The incarcerated diplomats were finally released after days, negotiations mediated by the Algerian government.
+Their ordeal provoked a wave of solidarity and anti-Iranian feelings at home.
+The debacle in Iran significantly influenced Jimmy Carter's loss with Ronald Reagan in the 1980 presidential elections.
+The film Argo, directed by the actor Ben Affleck, recounts one episode in this story, which brought America small victory.
+Just before the embassy was seized, six employees escaped.
+After some peripeteia, they ended up Canadian ambassador's residence.
+The collaboration with the Canadian authorities, succeeded in getting them out of Iran, helped by an cover story - they left Canadian passports as members of a film crew, who were surveying locations for a sci-fi blockbuster.
+A of genres
+The plan, conceived by "exfiltration" expert, Tony Mendez, required the assistance of Hollywood.
+For the story to be believed, the film project was reported on in specialist magazines, press conferences were organised, and the fictitious production company had a real office.
+The details of the operation were, for a long time, kept secret; the film draws on the memories of Tony Mendez.
+Affleck's film is a peculiar mix of genres.
+On hand, there is a realistic incisive political thriller, and at the same a "caper movie," with small victories and double-crossing - another example would be Ocean's
+The mood alternates in the film - on one side, sharp documentary-style sequences in Tehran (the title sequence shows iconic photos from news the time, relating to the same events portrayed in the film - there are no big differences).
+On the other hand, lighter sections from Hollywood, laced with irony and a little
+Then there are scenes from the headquarters and other agencies - men in suits debating the situation around meeting in office corridors, over the phone...
+Ben Affleck has managed to restart his career in extraordinary
+The derided actor has become a respected director, and his acting is no longer the target of ironic comments.
+Argo is his third big-screen movie, his dark crime movie Gone Baby Gone (2007) and the thriller The (2010).
+It is also Affleck's first picture, which does not take place in the director's hometown Boston.
+The atmospheric feel in different locations is one of the characteristics, which took his earlier films above Hollywood standards.
+Affleck shows it in Argo, where Tehran is "featured" by Canada.
+The best scenes of the film take place in the streets, in the reconstruction of real events - the opening sequence of the siege on the embassy is impressively lucid, at once feelings of confusion and surprise, which come flooding in, as history suddenly takes a turn.
+A similar effect is achieved by Affleck and his team in the fictitious scenes (the fake staff at the Tehran bazaar).
+Too much action in too many places
+The director had the issue that the story being told does not offer many nail-biting scenes for the film.
+What little there is, worked well, with some occasional embellishments to reality - do not all come off so elegantly (the scene, where a looming crisis is averted at Tehran airport by a phone call in America, followed by a chase on the runway seems quite far-fetched).
+Argo's weakness is its divergence, which comes from the need to show too many events in too many places.
+Alan Arkin and John Goodman play their roles as the Hollywood assistants with great charm; their characters deserve more and are not by far the only ones in this film.
+Affleck's loses the dramatic pull a little, it is a film, which can be watched with reasonable interest, its production and retro-style are evocative of thrillers from the 70's.
+It does not really captivate.
+As a reminder of history's particular ways and testimony to how exaggerated the views are on the all-powerful all-controlling secret services, this will do.
+Rules for blowing up balloons, for bananas and circus
+The www.bankovnipoplatky.com server, which issues a poll every year on the most absurd bank charge, has now decided to announce a competition for "the most absurd regulation or proposal from the EU."
+"We were prompted by the latest where the EU plans to take on a 40 percent quota of women at management level of Europe's largest companies," Patrik Nacher, the poll's organiser, told Pravo.
+Among the latest nominated absurdities, for instance, is the recent decision by the European Court to unify insurance premiums for men and women.
+Until now, women were favoured in life insurance because they constitute a lower risk for insurers.
+unbelievable ideas from the EU be nominated by anyone until the end of the year.
+The actual voting will then take place until the end February 2013," informed Nacher.
+Among the controversial we might include the mandatory addition bio-ingredients to fuel, which consequently harms the environment, the ban on reliable mercury just because they contain a relatively small quantity of toxic substance, or the rules on the size of chicken cages, which significantly raised egg prices this year.
+The ban on the use of the term "spreadable butter" and the withdrawal of classic light bulbs from sale previously come under criticism.
+First rate bananas are to 14 centimetres
+The Union's machine often makes decisions under pressure from this or that commercial or lobbying group, whose demands in Brussels are usually defended by state or of states' interests (just the Czech Republic is promoting the demands of its banks under threat of being vetoed).
+The lobby's interests were, for example, that bananas of the highest quality should measure at least 14 cm in the EU, and were to display and "anomalous curvature."
+The European Commission defended itself, saying that it was only harmonising existing disjointed national standards, which complicated trading.
+Norms relating to fruit and vegetables have already been softened by the EU despite opposition from certain states, referring to the food waste caused by the existing directives.
+One possible prize-winner in the poll may be the year's EU regulation according to which inflatable balloons must be sold a warning that children under 8 years of age may not inflate them without parental supervision.
+Here, the EU pointed to an American research, which indicated that, among other toys, balloons are one of main causes of child suffocation.
+A similar restriction applies to children under 14 years of age using party blowers.
+Strange ideas are at home too
+Fairly absurd is the rule relating to individual European officials - everyone in the EU, who holds an official post, may not use the term Macedonia due to it being a sensitive for Greece, and instead the acronym FYROM (Former Yugoslav Republic of Macedonia) should be used.
+The Bankovnipoplatky.com server in collaboration with the Liberal Economist Association, Laissez Faire, also nominated, aside from the aforementioned absurdities, for example the Union's regulation on the volume of food provision stocks held in an EU member state.
+The EU stipulated the maximum volumes of food provisions, which may be present within the CR on the day of our entry to the Union.
+The Czech Republic thereafter exceeded, for the permitted volume of mushroom preserves, which incurred a high penalty.
+The poll's organisers were also impressed by the idea of paying certain countries they do not have a coastline, or the of allocating funding for a request for funding.
+These did not come from Brussels, however, but from Prague.
+"We are handicapped because we do not have the sea.
+We are asking the European Union a refund," the minister for agriculture, back in 2004, Jaroslav Palas (CSSD).
+His argument was that there had been a good harvest of cereals, due to the so-called buy-out interventions, the state's warehouses were full and were forced to export.
+The Czech Republic is further away from a port, so according Palas the EU should be paying us hundreds of millions of Euros.
+The European Commission finally met the CR halfway by organising a tender for the purchase of cereals countries that do not have access to the sea.
+Funding to subsidise funding requests was offered to foreigners by the Ministry for Regional Development's minister, Pavel Nemec (US-DEU), specifically this was meant for making requests for funding from Brussels.
+EU: Bizarre legislation is the exception
+Regulations may well become the target of criticism among member states, but the EU's efforts at regulation, more effective operation, and development of the entire Union deserve recognition, according to a number of experts.
+A important issue, according to experts, is the drawing of EU funds on projects, which have hardly in common with strengthening the European integration, but which was pushed through by member states during a budget
+Emotions flare among Czechs when, just as other countries in the the CR must fight in Brussels for the right to particular labelling on its traditional products, in which it does not always succeed.
+The Czechs fought for six years with the Germans and Austrians to protect the labelling of their Olomoucke tvaruzky, however the tuzemsky rum, whose tradition reaches back to the 19th century here, had to be renamed tuzemak by the manufacturers.
+The appellation of rum can only be given to products cane sugar, and not sugar beet.
+Carlsbad wafers, Pohorelicky and Trebonsky carp, and Zatec hops have been added to the official list of registered products of the alongside the world-renowned feta cheese and gorgonzola, German marzipan from Lubeck, and Parma ham.
+The EU's stamp of protection can also be proudly shown on Pardubice gingerbread and Horicky tubes.
+People me to save the republic, I am an amateur, says Okamura
+Senator, how does a person decide they want to run for President?
+This is not me being a senator or president.
+If everything in our worked without problems, then I would not be running for any post.
+I cannot watch any longer the country having been robbed over the past twenty years, thieves roaming about there and people's taxes and retirement age increasing.
+I had no ambition to be a politician.
+When I see something I do not like, though, I try to find a solution to change things.
+Since I already turned forty, and I independent non-party man, I have no other to influence things but to stand senator or president.
+You have already reached the Senate, but shortly after that you taking off for the Castle.
+Are you not turning your back on those who voted for you in doing this?
+have been saying the entire time I would for the Castle based on the results in the Senate's elections.
+Later, I added that if I were elected as senator, I would standing for president.
+My goal, though, is not the post, the post is a tool to allow my vision to be
+I need the greatest influence, and the strongest mandate
+The trouble is not just that as a nation we swear in the pub or at the television, but that we trample anyone, who wants to try to change things.
+The Media add to this, misleading the public, and mistaking freedom of speech freedom to lie.
+I was allegedly bribing reporters, or I allegedly an advisor of Jiri Paroubek.
+Let's talk about your vision.
+You set out on your castle siege with a thesis on the material and criminal responsibilities of politics, and a retroactive financial disclosure of assets over twenty million.
+need to change the law for this.
+As president, though, you do not have this power, and only the Senate a whole may propose laws.
+How are you going to solve this?
+When I lobbied, as a citizen, for tour guide work to be a free trade, it was successfully carried through.
+The problem is political squabbling when someone comes with a good idea from the left or the right, it will be deliberately rejected, causing delays for the public.
+an independent non-party man, I stand far better chance of gaining support from all parliamentary sides.
+The advantage I hold is that without the political pigeonholing dogmas I can take what is best for our country from any side, and apply it.
+Do you see yourself as person from right, or the left?
+From the Czech viewpoint, it seems they tend to put to the left.
+For me, it just does not matter if it is a to the left or right.
+The important part for me is moving forward.
+It is not about whether someone is from the left or right, I just want to bring people together.
+I always support any good public solutions, even if they are put forward by the KSCM or the ODS, and, in the same way, I will oppose bad ideas.
+get angry when someone calls you a populist.
+Are you not confirming this with what you have stated?
+When you make a company business plan, you also have some ideal and vision.
+You try to come close to it.
+Some may call it populism, but all the proposals I speak are already working elsewhere, or they have been put forward by experts.
+But without the support of the Parliament you will be left with just slogans.
+You will not last long in politics with that.
+Or do you believe that if you walk among the public and them, that you will succeed, say, in passing criminal and material responsibility?
+I have no alternative.
+I need to convince politicians, reporters, and the public, and try to get them on my side, so we can put this through.
+If I were elected president, it would not be a problem to arrange a live television broadcast, where I ask the leaders of the parliamentary parties to pass law on material and criminal responsibility for politicians, civil servants, judges, and the General.
+And, as the case may be, they need to explain they did want this.
+When there is a strong figure to point out the issues, it just needs some pressure on the political scene.
+Take for instance the direct election of the it was achieved thanks to public pressure.
+I will say frankly that am an amateur, I am not a genius or an intellectual.
+I am looking for allies to share my opinions and vision.
+I have started out in and I am looking for a majority for my agenda.
+I will try to make things progress, but it does not work out, in six years will finish and return the private sector.
+It sounds a little like Okamura is trying to save the Czech Republic.
+I am no saviour.
+I know that I will not achieve anything, so I have asked acquaintances, whether they would run for the senate.
+I went to Radim Jancura, who declined due to his workload.
+So I, at least, support investigative journalist, Jana Lorencova, who uncovered fraudulent activity with light heating oil.
+I put myself forward, because people are really discontented, but now I have my doubts.
+Sixty percent of people did not go to vote, and those who did mostly voted for leaders of the establishment.
+In the senate, there are only two independents, including me.
+People have voted for a senate that will make it difficult to enforce changes.
+Nonetheless, I will fight for my vision, for example, for election of mayors or regional council presidents.
+Are you considering having your own party?
+I have not considered it yet, because I have neither the time to verify every party member has a clean background, nor the to do it.
+I have no money even for a presidential campaign, my transparent account holds just 20 thousand.
+You have no money?
+You are talking about financial disclosures, but what is yours like?
+estimate my private assets to be around 60 million.
+In Prague, I have land worth around 25 million, an apartment worth ten million, another apartment worth eight million, an artwork collection worth around ten million, an Aston Martin worth 3.5 million, a Skoda Superb worth a million, and I have a few million in my account.
+I have the Aston Martin, by the way, because my dream as a boy - I liked James Bond, who drove the car, gallant women and also fought against evil and villainy.
+You an Aston have assets worth million, but you have no money for campaign?
+You say you want to change the Republic, you are not keen on putting your own money into it.
+This does not inspire much confidence.
+I do not have 15 million for a campaign.
+Should I take out a loan?
+I have already put 2.5 million into the campaign.
+The fact that I do not have any sponsors is evidence that there is no real interest in my programme.
+I have no obligation to pay for my own campaign.
+The expenditure on my campaign is basically covered by the pay I will be receiving as a senator.
+However, I would not be able to live it, for instance, I not pay for my son's English school, which costs 30 thousand a month.
+If I were only interested in making money, I would not be standing for election.
+So you will still be in business so that you can a living?
+Did you not say you would be putting this on hold?
+This depends on the rate of pay.
+As I promised, my activities have been partially reduced.
+For example, my deputy is taking over as the CEO of the travel agency in spring.
+People would like me to be a Samaritan, who the Republic.
+But I must also live off something.
+As a businessman, what would you usually make monthly?
+Two hundred to 400 thousand, which I still do.
+And if I became then I would end my business activity.
+The full interview can be read in Saturday's issue of Pravo.
+The MVRDV architects prove that true adventures are not just in the head - drawing on the example of Spijkenisse and the recently Bücherberg (literally "book mountain") - 2 photos
+"I think building is fun, looks futuristic and provides something interesting to look at," said Lisette Verhaig, a passer-by at the road-side.
+And Spermon, IT technician in a major firm based commented: "It's definitely a thing of beauty, the building."
+However, I do wonder why people would need another library in this day and age.
+Everyone the Internet, an iPad and eBooks.
+No-one goes into one of these old-style libraries voluntarily nowadays, or am I wrong?
+Spijkenisse, a sleepy town outside the gates of Rotterdam, which merits a visit, is a special record-holder.
+The municipality has the lowest literacy rate in the whole of the Netherlands.
+In order to counteract this asinine situation, the decision was made a number of years ago to make a contribution towards general education and to recreate the seven fictitious bridges that feature on the Euro notes as pretty, reinforced concrete miniatures.
+The success of the education offensive was limited.
+And so the city fathers acknowledged that there was only one way to become master over the statistics: a library had to be built!
+Winy Maas of the Rotterdam-based architectural firm MVRDV, of audacious bar charts and producer of humorous and often cynical buildings, took the project on with his customary composure, and turned up at the competitive hearing in 2003 with books under his arm and a on his face.
+And with the judging panel still looking at him with bewilderment, shrugging shoulders, the impertinent Maas stacked his chosen props by order of size to form pyramid and rounded off his presentation - now suitably backed up with action - with the words: "Dear Municipality!"
+So this is my suggestion for the Spijkenisse Mountain - for the so-called Boekenberg!
+Nine years later, the 30-million-euro mountain has been lifted up.
+It is part of a revitalisation project, which also includes an underground car park, a supermarket, a post office and small number of adjacent apartment buildings and terraced houses, a total of 50 dwellings.
+At the beginning of November, the Bücherberg was awarded second place in "Best Library of NL 2012" competition.
+In addition, the project is also nominated for the Dutch National Wood Award 2012.
+Thus, the faceless small-town retort, that until now had nothing more to offer than a post-modern pedestrian area and a stunningly ugly town hall, behind whose white facades one would expect to find a dairy plant, been bolstered by a piece of contemporary architecture.
+First and foremost, however, Spijkenisse now has its first public cultural building in the history of its existence.
+The long journey to the book
+The first impression: the Eldorado of books beneath a cheese dome.
+There is in fact a lift that climbs through the centre of the mountain massif, however, the true joys of space literature are when scaling the topography on foot.
+The interior space, glazed throughout, is bright and the fired clinker floors and the elegant street lamps speak the unmistakable language of a public town square.
+The urban ambiance is perfect.
+You already on the lookout for a park bench, a dog, and boys and girls playing
+And everywhere there are books, books, books.
+"Normally book shelves run along the facade, and in the centre there is a large, dark space, which is usually unpleasant and impersonal," says Winy Maas.
+We turned the classical spatial configuration on its head and turned the reading area inside out.
+The interior of the Bücherberg is cleverly used: in the centre there are offices, an Internet library, a chess club, an environmental centre and the technical support room.
+One particularly special feature are the black book shelves, which simultaneously act as cladding, parapets and railings for the stairway.
+The appearance, feel and scent are foreign.
+Even die-hard architects and construction engineers shake their heads at the unknown construction materials.
+"Here we wanted to with recyclable materials," Joop Trouborst, Project Manager for Municipality of Spijkenisse, on request of
+And thus one day we stumbled across a suitable waste product in agriculture, on a Frisian
+many years, millimetre-thick artificial fabric has used in greenhouses and in the Netherlands as a base layer.
+It is inexpensive saves time.
+The thin textile lasts for two seasons and then disposed of as bulk waste.
+For the library, the fabric was - for the first time in these quantities - pressed four-centimetre-thick boards.
+Under heat and pressure, the Landbouw plastic (KLP) changes colour to a dark, homogeneous and material, that smells like mixture of new car smell and the smell of trainers.
+105 steps you have reached the summit.
+At the end of the 500-meter-long journey, you are rewarded in the Literature Café, not only with a fantastic view of the city, but also with Dutch croquettes and potted trees.
+These provide atmosphere, but most regulate the air humidity in the literary mountain range.
+Donations for the new soul
+"You would hardly believe it, but this building, in spite of the many glass panels, is a showcase project in the area of ecology," said Trouborst.
+It is heated and cooled using geothermal heat.
+Although Bücherberg has a glass cover, the sun only only briefly into the interior, even on sunny days.
+The broad, laminated wood glue beams positioned at right-angles to the glass facade, provide and absorb the majority of the sunlight.
+The indoor temperature is pleasant.
+The rest is taken care by fully automatic blinds.
+Stefan Spermon, initially a sceptic of the IT sector, has already ventured into the new library.
+Lisette Verhaig has also visited already.
+So too has TCM-teacher, Cynthia Bogarde, even refers to the as Spijkenisse's "long overdue
+The reason: At the inauguration just a few weeks ago, every citizen was invited to a book his/her personal collection.
+This was, for the time being, to fill the optical gaps in the not yet fully stocked library - currently there are 70,000 items.
+The concept has been a success.
+The shelves are to capacity.
+"Nothing is worse than a half-empty library," said architect Winy Maas.
+"I think that, thanks to our invitation, every resident now has a certain bond with this new building.
+Everyone knows that their book is part of the building.
+Even if it's just for decoration.
+As such, MVRDV have in mastering the master discipline that specialist jargon refers to as the formation of identity.
+Spijkenisse has written literary history.
+However young uneducated it may be.
+This is ultimately a starting point for identity.
+Szabo: "Germans play a role"
+In the vote on the incorporation of Palestine, abstained voting.
+According to Stephen Szabo, in US-European relations, in so doing Berlin is walking a thin diplomatic line.
+Deutsche Welle: At the beginning of the week, Germany had initially signalled that it would vote against the Palestinians' application observer status within the United Nations.
+However, Berlin subsequently abstained from voting.
+Why?
+Stephen Szabo: Germany does not support what the Israelis have done Gaza.
+Now, however, due to their special relationship with Israel, Germany must be cautious.
+At the same time, however, I do not believe that it supports the American position either.
+Germany wanted to demonstrate its independence - albeit without being too critical of Israel.
+During the uprising in Libya in March 2011, Germany likewise abstained from voting, when it came to establishing a no-fly zone.
+This was ultimately implemented by NATO.
+Does Germany find it difficult to adopt a clear position when it comes to international affairs?
+Yes, it does.
+That is because it has just reorganised its foreign indeed moving from a policy that was, so to speak, managed by the USA, in favour of a German policy.
+This situation is aggravated by the fact that the Europeans do not have a coherent and standardised policy.
+The Germans thus find themselves caught between two fronts.
+It is expected of them that they play a more independent role, yet this is something they are not accustomed to.
+I believe that they still finding their way in this role, but they are en to a "more normal" foreign policy.
+A foreign policy similar to that of France, or Great Britain.
+So what does a foreign policy entail, from a German perspective?
+It shows a willingness to adopt positions on international matters, which are independent of those of the USA or European partners.
+I believe German foreign policy is by the economic policy, that is, by export and relations with certain regions such as Russia, China or the Near East.
+Germany's economic interests are a certain extent different from those of the other major powers and therefore Germany must protect its interests.
+Have these economic interests had an influence on their attitude towards the Near East conflict and their voting in the UN?
+the one hand, Germany has major revenue markets in the Near East, and particularly in the Gulf States.
+Therefore it must be careful not to affront the public, but the elite in the Arabic countries.
+In any case, this plays a role.
+However, I wouldn't want to ascribe too much weight to this. This is not an entirely one-sided relationship.
+Nonetheless, it does play an important role in Germany's considerations.
+Has Germany damaged its relations with the USA, by abstaining to vote on important decisions, such as vote on Palestine?
+I think that in Europe, and even in the USA, a great understanding for the German position prevails.
+Therefore not think that this was as dramatic a fracture as was the case in the matters Libya.
+Perhaps it will even earn Germany a certain degree of respect.
+After all, it signals that the country must be taken seriously as an international player and that interests must be considered.
+In Europe there are diverse opinions regarding Palestinian
+The USA, on the other hand, have spoken out in favour of a veto.
+Are there differences of opinion between the USA and the many European nations?
+Due to the American domestic policy, these differences have always existed.
+I think that secretly, the government under Obama actually has a great deal of understanding for the European situation.
+However, due to the political situation here, the government is naturally unable to voice position publicly.
+It is my belief that the actual differences in opinion are not so vast as they always appear.
+If you look at the relations between Obama and Prime Minister Netanjahu, Obama is really not quite so enthused by Netanjahu's policies.
+Does Germany find it difficult to reconcile its close with Israel and the the one hand, and the position of its most important partners in the EU on the
+I think that this is precisely what makes things so for the Germans.
+It of course be a simpler for the Germans there were coherent and standardised European policy, which is currently not the case.
+Thus they are unable to be part of a wider and must instead drive matters from their own position.
+This precisely what they are doing with the Euro.
+I believe that in the future will take on a leading role in urging Europe towards a standardised European position.
+is, of course, no simple task for Germany, on account of its relations with Israel.
+This has always been a sensitive subject.
+Yet I do think that Germans are clear that they must play a more independent role.
+Does Germany view itself as the role of an important player - does Germany actually want to assume a leading role?
+Or does Germany still find leadership difficult?
+Germany is still not used to it, the continues to be uncomfortable and, for obvious reasons, still finds it difficult to play a more prominent role.
+we look at Euro crisis for example, every time that Germany assumes a more prominent role, various anti-German feelings become apparent.
+This does make matters simple for Germans.
+This is actually the same old problem: one does not want to be surrounded by hostile countries.
+From this stance, Germany is in a much more difficult the USA.
+It must be receptive to the most diverse of neighbours and opinions, and this is not easy.
+The influence of the USA over European politics is continually diminishing, yet the EU is currently not feeling this vacuum, so who is filling the gap?
+The Germans will simply have to play a greater role.
+Even if they do not like it, even if is uncomfortable and makes them even unpopular - c'est la vie!
+Stephen Szabo is associate director of the Transatlantic in Washington, an institute in which academics and political experts from Europe and North America come together to research the challenges of the transatlantic community.
+Szabo is also a member of the German Marshall Fund, in which he has specialised in policy, US foreign policy and transatlantic relations.
+"Brand protection" in China: When Puma and Armani suddenly become Chinese
+Armani is a world-famous brand, Polo Ralph Lauren likewise.
+However, what is Armani Polo?
+Behind this name hides a fully officially registered brand in China, however, one that has nothing whatsoever to do with the original companies.
+Nonetheless, it is enjoying protection, provided the actual creators of the names do not sue.
+And even then it is not whether they will have any rights.
+"It becoming increasingly more difficult for foreigners to protect their brands in China," said Thomas Pattloch, lawyer within the Taylor Wessing firm, who specialises in copyright infringement in the Far East.
+Every week a new case lands on my desk.
+All the copycats require are a few additional letters in order that they can register their brands.
+Thus Gucci simply becomes Lu-Gucci, Prada-Kny is registered in of Prada.
+German companies are also 'legally' copied this manner, such as manufacturer of sporting apparel, Puma.
+Pattloch opens a file containing registrations with the trademark office in Peking.
+On 14 September 2010 a Chinese company copyrighted the brand name Zegna Puma there, an alias that also helps itself to the name of fashion retailer Ermenegildo Zegna.
+The fact that the Chinese are world champions in copying and infringing on intellectual property is well-known.
+In the major cities there are multi-level department stores that sell counterfeit goods almost exclusively.
+Pattloch's however, are slightly different: on behalf of his clients he takes action against the fact that companies can be granted the right to use a name by the trademark office, officially, is already protected
+The Chinese call this Mingpai," a passenger brand.
+The word is based on "Bang Dakuan."
+This refers to women who latch onto rich men.
+The Chinese authorities are unaware of any wrongdoing.
+"This harms business and we must fight against it," challenges Pattloch.
+"The is watered down, its uniqueness disappears - the image damage is enormous."
+The financial losses and costs of the affected branches amount into the millions, in the case of expensive flagship products.
+According to information from market research company CLSA, with a volume of 15 billion euros annually, China is the third largest market for luxury items, and the fastest growing.
+However, the deletion of dubious entries in the trademark registry are difficult to achieve, and cost a pretty penny.
+The process can last for up to nine years, with an uncertain outcome.
+Pattloch reports of instances whereby court dismisses cases, because after a long period of time, the to objection is being raised has become a "market reality."
+If the complainant unlucky, he may even have to pay the plagiarist money for having infringed on his trademark in China, said Pattloch.
+Sometimes the law of the jungle prevails here.
+Famous cases also relate to graphic elements.
+In 2009, Daimler lost a legal battle with the construction machinery manufacturer Sany, the company that recently acquired German concrete pump manufacturer Putzmeister.
+Even today, Chinese company is therefore permitted use an emblem that resembles the Mercedes star.
+Volvo-purchaser originally used a blue and white logo that resembled the BMW logo; the dispute was arbitrated and Geely was forced to change it.
+Fashion house Lacoste lost a suit in China against copycats from Hong Kong and Singapore, who were using the famous crocodile looking in other direction.
+The Chinese authorities are unaware of any wrongdoing.
+The CTMO trademark office in does acknowledge that there were bottlenecks in 2010 due to limited staffing and equipment.
+In the past year, however, things reportedly "returned to normal following this emergency situation regarding the work flow."
+Thus the stock of appeal proceedings was reduced by 22 percent.
+Almost 57,000 such cased were closed, 75 percent more than in the previous year.
+Nonetheless, there are still 81,500 appeals waiting to be resolved in the office.
+To remedy this is expensive
+As is often the case in China, the figures are imposing.
+In the past year, more than 1.4 million applications for protection were submitted to the CTMO, almost one third more than in 2010.
+This is a record means that China, for the tenth time in succession, is the global leader when it comes to new trademark applications, informed the authority.
+The same applies for the inventory of valid trademarks, totalling 5.5 million in number.
+In 2011, 1.8 billion yuan in fees were received.
+Put simply, this means that each application costs on average 1,280 yuan, or 160 euros.
+To appeal against an application costs many times this amount, as can seen in the case of the German family business, Freudenberg.
+For more than seven years, the group has been contesting against a Chinese plagiarist.
+The Germans did in fact manage to expose the company's illegal manufacturing copied motor vehicle parts.
+the copycat still secured the Chinese rights to the Freudenberg brand.
+This is something we missed ourselves, as family names cannot be protected in Germany, said Hanno Wentzler, Chairman of the Board of Management at Freudenberg Chemical Specialities in Munich.
+The CTMO trademark office then also dismissed the Munich-based company's appeal.
+In the next two instances, Freudenberg was proven right, however the opposing party continues to contest the matter to this day.
+You have to pay extremely careful attention.
+The matter is now pending before the Supreme Court.
+Wentzler is confident that the matter will be brought to a positive conclusion and praises professionalism of the courts.
+However, he also says: "The process is extremely expensive and takes a lot of time, money and nerves."
+The internal costs can barely be calculated, the company archive even had to look through century-old records order to provide proof.
+Five years ago Freudenberg unsuccessfully offered the opposing party a "high six-figure sum in euros" as settlement.
+"This shows how much this is worth to us," says Wentzler.
+The dangers the Far East even threaten to spilling over, back into Europe.
+Particularly if imitators secure unprotected brand names there.
+For example, Chinese manufacturer wanted to register the Freudenberg label for shoes and leather in Germany.
+This is a business sector that the group had long vacated, yet nonetheless managed to prevent the registration.
+"You have pay extremely careful attention," says Wentzler.
+Both he and Pattloch advise companies to be very careful when conducting business with China.
+It is not sufficient to rely on trademark rights, rather foreigners should also register "everything" that is in any way worthy protection in China as well," said Wentzler.
+Otherwise costs can be much more expensive than the registration fee.
+In actual fact: if Freudenberg were to loose at the final hurdle of its trademark drama, they would probably have to pay the opposing party license fees for the use of own name, explained Wentzler.
+Or alternatively we would be forced out the market in the sector.
+World AIDS day: Stomp, sing, help
+In Heidelberg, the Imbongi choir is rehearsing - and in Swaziland, AIDS orphans are delighted.
+The history of a link that overcomes far more than a distance of 8,733 kilometres.
+First all, the stamping: cowboy boots, basketball shoes, ladies' pumps and men's attempt to find the beat on the parquet floor, and quickly do just that.
+One-two-three-four.
+Only then the voices of the singers slowly swell - alto, bass, tenor and soprano surge, beguile and haunt.
+And Fiete Hopf, the 29-year-old conductor, almost rises up out of his shoes as he brings the ensemble together with his smooth, yet wild gestures.
+It is evening and in the music room of the Institute for Medical Psychology in Heidelberg the Choir are practising a new song.
+The fifteen singers, aging from 23 to 69 years old, range human geneticists to the maintenance man.
+"Om'Obani" is by no means a simple piece, with each voice having a different text, and in an extremely foreign language at that: Zulu, which is spoken by eleven million people in South Africa, Botswana, Malawi, Mozambique and in parts of Swaziland.
+Helping others help themselves
+There are around 34 million infected with HIV around the world, as according the estimations of Unaids, the United Nations' programme to battle AIDS.
+Of these, 23.5 million live in South
+In Swaziland, there are 245,000 AIDS
+Meanwhile, more 40 percent of the population are HIV positive.
+The Voices for Africa Association has found sponsors in Germany for 180 AIDS orphans in the village Esitjeni.
+70 of these attend a secondary school.
+For 15 or 20 euros per month, you can become a sponsor.
+This guarantees the child money for school, a school uniform and warm meal each day in the Gogo Centre.
+In Zulu, Imbongi means storyteller or worshipper.
+In this region, no-one can speak the Bantu language fluently, but they can sing it.
+For almost ten years the choir has practising in this foreign, 'soft' language, and now and then they bring back to where they originally came the South of Africa.
+For an 8,733-kilometre flight away from Heidelberg, in the north west of the Swaziland Kingdom, lies the village of Esitjeni, which relies on the vocal power of the German
+Forty percent are infected.
+Around 2,000 people live there, still in simple mud and straw huts, and the majority of them are
+More than 300 of them no longer have parents, as they succumbed to the HIV virus.
+In Esitjeni you get a small foreshadow the illness from which all of Swaziland is suffering: according to Unicef, the region the highest HIV infection rates and the lowest life expectancy in the world.
+Circumcision, which has been proven reduce the risk of contracting the by half, is barely practised by the population.
+More than forty percent of people in the Swaziland carry the immunodeficiency virus, and dying in you mid-thirties is by means rare.
+On a group trip to Africa in early 2005, the Choir visited the village, but first and foremost, the many children on the streets, lacking not only in parental care but in practically everything else as well: food, clothing, education.
+a school leaving there barely any opportunities, particularly in a poor country.
+Initially it was the private commitment of to send a child to school and enable him/her to have one warm meal a day for a few euros per year.
+However, just one year later, the choir established the "Voices for Africa" Association, which since then has been looking after the AIDS orphans in Esitjeni at an almost professional
+Facts on sexually transmitted infections.
+What are the most important transmitted diseases?
+Bacterial include syphilis, chlamydia and gonorrhoea.
+Common viral are HIV, human papilloma viruses, herpes genitalis and hepatitis.
+Crabs scabies belong among the parasitic STIs.
+Who the main affected groups?
+Syphilis and gonorrhoea occur primarily in men that have intercourse with other men.
+The Robert Koch Institute understands that at least four in five of all syphilis cases reported in Germany are transmitted by means of sexual contact between men.
+Among heterosexual adults, chlamydia infections, trichomoniasis, candidiasis (fungal), gonorrhoea and human viruses are frequently sexually transmitted diseases.
+The spread of HIV among heterosexual adults in this country is relatively low; however, 20 percent of newly contracted cases of HIV are found in this group.
+Among young people, chlamydia infections are much more than in other population groups.
+According to European surveys, three quarters of all infections affect young people between the ages of 15 and 25.
+In this country, human papilloma viruses are also frequently found young people.
+How has the of infections developed?
+Not all sexually transmitted diseases are notifiable.
+According to the Robert Koch Institute, the number of has more than doubled from 1,697 cases in 2001, to 3,698 cases in 2011.
+The number of newly contracted cases of HIV has been on the decline since 2007.
+In 2011 there around 2,700 cases.
+This is around one tenth fewer than the previous year.
+Which symptoms indicate a sexually transmitted
+The infectious diseases can cause ulcers in the genital area, discomfort when urinating, discharge, lower abdominal and blisters or warts.
+However, often they cause no pain or any other symptoms, thus remaining undetected.
+How can you protect yourself?
+Condoms can reduce the risk of contraction, however, they do not offer 100% protection.
+This is because occasionally, the pathogens of sexually transmitted diseases can also be passed on via smear infections and close bodily contact.
+Therefore, first and foremost experts that people with frequently changing sexual partners undergo regular
+If diagnosed early, the majority of STIs can be and long-term consequences avoided.
+Through sponsorships donations and by no means the funds that the choir raises across the whole of Germany, the money all adds up.
+"In total, we have already sent around 200,000 euros to Esitjeni," said Annette Lennartz, Chairperson of the association.
+In the village itself, Zodwa Dlamini, a self-assured and assertive woman, manages the money from Germany.
+She makes sure that the orphans have good accommodation, for example with one of their grandmothers.
+The Gogos, as the old ladies are called Zulu, are the pillars of the village.
+Some of them have up to 14 orphans living with them, providing them with a roof over their heads and making sure that the children get to their school classes punctually every day, in their school uniforms.
+Anyone who doesn't have anyone left, arrives at the shelter with Khanyisile, a single woman who earns the same salary from association as the two cooks who cook for more than 200 hungry children every day.
+In addition, "Voices for Africa" has established a sewing school,built two chicken coops and, together with the American health organisation, PSI, organised for in the village to be tested for HIV.
+This is to be taken for granted, as is clearly attitude towards illness throughout the entire country, the best way of keeping things under wraps is if people are dead.
+A king with 14 wives
+"AIDS is an absolute taboo subject," said Annette Lennartz, it is associated with sexuality."
+This is actually strange for a country in which the king has 14 wives.
+The last absolute monarch of sub-Saharan Africa, King Mswati III., is known for his excessive lifestyle.
+Polygamy in place of democracy.
+Among other factors, the fact that the HIV virus has spread quickly over the past number of decades can also be attributed to this officially sanctioned lifestyle.
+Another factor is the large number of workers who carry the virus the country.
+are free on every corner, Lennartz, "but they are hardly used.
+The culture prescribes otherwise - flesh to flesh."
+In order to promote the cultural exchange, the Imbongi choir travels through Southern Africa every two or three and sings of fighting spirit, confidence and black self-esteem, which many from the southern tip of the black continent still know from the times of apartheid.
+A bus full of white people, who sing songs in a black language - this degree of recognition brings not only morale and joy, but some grim-faced border soldiers even shed a few tears.
+The journey always leads to Esitjeni, where the singers visit their sponsor children.
+Even though you can barely find the small village on a map, it is more than well-known in the valley of the Ezulweni River.
+"Go to Esitjeni, that's where the light is," say the people there.
+And if you make the 8,733-kilometre flight back to Heidelberg, visit the stomping singers in their rehearsal room, you'll see the light is there too.
+Messenger: discovers on Mercury
+Messenger probe has found evidence of ice on planet Mercury.
+It is thought that the ice cover may be 20 metres thick.
+The US space agency, NASA, has proven the existence of ice on the planet Mercury.
+Although the planet lies closest to the sun, it does have frozen water - as shown in three studies published Thursday in specialist magazine "Science."
+The Messenger probe has found evidence that there is an ice cover in the region of planet that lies permanently in shadow.
+This is thought to be at east 30 centimetres and perhaps up to 20 metres thick.
+The water presumably came from comets perhaps also asteroids that impacted with Mercury.
+However, no-one is linking the discovery of ice with the of life on the planet, said Scientist for the Messenger probe, Sean
+The temperature on Mercury can reach up to 426 degrees Celsius.
+said, the findings could help explain how water and other building blocks of life reached other regions of the solar system.
+Unknown to the majority of the Earth's inhabitants, there are probes, telescopes and small robots such as the Phoenix, deployed to research the depths of the universe.
+From time to time, they transmit images to Earth: small peepholes into the infinite expanse.
+This image comes from a camera developed by German researchers at the Planck Institute.
+The eight of our solar system, plus the dwarf planet Ceres.
+Like Pluto, which orbits around the sun behind Neptune, is not planet according to the new definition of the term issued by the International Astronomical Union in 2006.
+This image section from infrared recording by the Spitzer telescope shows a "family portrait" of countless generations of stars: the oldest stars are seen as blue dots, while more difficult to identify are the pink-coloured "new-borns" in the star delivery room.
+This region rather unromantically named W5 by scientists - was discovered by the Spitzer telescope in the Cassiopeia constellation, at a distance of 6,500 light years away.
+This shimmering glow of a dying was captured by NASA's Spitzer telescope.
+The donut-shaped ring consists of material, ejected by the star in the process of dying.
+In the huge Trifid Nebula, 5,400 light years away from the Earth, new stars are created from gas and dust.
+NASA's Spitzer telescope shot this photo of the galactic delivery room.
+The Pleiades star cluster, also referred to as "The Seven Sisters," be seen with the bare eye at night.
+With the telescope, the colours really come into their own.
+In this infrared photo, the Helix Nebula looks back at the observer like red
+It is located light years away in the constellation.
+Its similarity with the continent resulted in Nebula acquiring the title 'North America'.
+A combination normal and photography produced the spectacular colouring.
+baby star could only be captured in its full beauty using the Spitzer telescope's infrared detectors.
+Saturn and its rings: How these occurred is the puzzle the field of
+Perhaps they are the remnants of a moon Saturn, which disappeared without a trace 4.5 billion years ago.
+One the largest and sharpest pictures the Hubble telescope: the Whirlpool Galaxy
+Depending on the colouring, photographs of spiral galaxies can become genuine works of art.
+The photograph published by the Southern shows the Trifid Nebula in Sagittarius constellation, several thousand light years away.
+The name Trifid stems from the Latin word trifidus (divided into three parts), as dark stripes of dust divide the core of the birthplace of stars into three parts.
+In the Ophiuchus constellation, astronomers have photographed the signs of a cosmic collision: 400 million light years from the earth, the cores of two merging galaxies move rapidly towards one another, destined to
+This star birth was captured by the Hubble telescope in the M83 spiral galaxy.
+Anyone doesn't like technical abbreviations may prefer to call it by its nickname, the Southern Catherine Wheel.
+The photo taken by the Hubble space telescope shows section of the Iris Nebula in the Cepheus constellation.
+The nebula, 1,400 light years away, consists particles of dust that are ten to one hundred times smaller than standard house dust.
+This image was put together from the X-ray images captured various telescopes.
+It shows a of black holes, 430 million light years away from the
+This group of galaxies, named Arp 273, was pictured for NASA by the Hubble space telescope.
+Scientists call the larger spiral galaxy UGC 1810.
+This star nebula is home to the brightest group of young stars in our Milky Way.
+This 'star cradle' continually produces new youngsters.
+Likewise, this star cloud, connected to the Rosette Nebula, continually produces new stars - 5000 light years away from the Earth.
+In this bright shining galaxy with one small black hole, there exists no dust - only gas.
+Researchers presume that it only came into being shortly after the Big Bang, when the universe was comprised primarily of hydrogen.
+Our view of the universe: the most important telescopes
+The telescope is thought to have been invented in 1608 by Hans Lipperhey - even before Galileo Galilei used the device to observe the stars one year later.
+Since then, the mirrors in optical telescopes have become increasingly large and the that they provide increasingly profound.
+For a period of 30 years, namely from 1947 until 1975, the Hale telescope in the Palomar Observatory near San Diego was the telescope in the world.
+The shown had a diameter of five metres.
+Arizona, USA,is home to the Large Binocular Telescope.
+It enables views of space via two mirrors, each with a of 8.4 metres.
+The inner workings the Gran Telescopio Canarias on the Canarian island of La Palma are huge - the mirror alone has a diameter metres.
+The mirror of the Southern African Large Telescope in South Africa is segmented - to reduce costs.
+In spite of this it achieves a diameter of around eleven metres.
+The disadvantage of this inexpensive construction method: the telescope is securely at its angle of inclination and its is therefore limited.
+Hobby Eberly telescope in Texas also has a fixed angle of inclination.
+What sets it apart: the light-gathering
+This - in spite of its comparatively low mirror diameter - even matches that of the world's largest reflector telescopes.
+With the help of a radio telescope in Arecibo (Puerto Rico) researchers can listen for extraterrestrial signals in space.
+The radio has a diameter of 305 metres.
+In the "Search for Extraterrestrial Intelligence" (SETI) every computer owner can be of assistance, by making his/her processing available.
+View of the European Southern (ESO) in the Chilean Andes.
+This is home the Very Large Telescope, which lives up to its name.
+With its total of four mirrors, the telescope can also focus on the medial infrared spectrum.
+Likewise to be located at the ESO Observatory in Chile, the European Extremely Large Telescope is also being planned.
+Its main mirror is to span a full 42 metres and will be made from almost mirror elements.
+However, images are not to be expected until 2018 at the earliest.
+Until 2007, the two Keck telescopes at the Hawaiian volcano, Mauna were the largest in the world.
+They each have two mirrors, each with a diameter of ten meters.
+The Keck are part of the Mauna Kea Observatory, which alongside the Keck telescopes, can look to the with the help of the Subaru telescope and the IRTTF.
+Another huge new telescope is also to be built on the Mauna Kea, with a mirror diameter of thirty metres.
+Here you can marvel at an artist's impression.
+However, the most important insights into space are provided the Hubble space telescope.
+Since 24 April 1990 it has been supplying images of distant worlds.
+Since 2009 the Kepler space telescope has been searching extra-solar planets, especially for any that may be inhabitable.
+On 2 February 2011 it was announced by NASA that 1,235 planetary candidates had been identified since the mission began.
+The image documents the final launch preparations on the Kepler space telescope.
+The James Webb Space Telescope (JWST) will into space board an Ariane5 rocket by 2018 at the earliest.
+The primary mirror of the infrared space has a diameter of 6.5 metres.
+One of the telescope's tasks is to search for light from the first stars and galaxies that after the Big Bang.
+Scientists assuming that ice also exists at Mercury's south pole.
+However, there is no reliable data in support of this as the Messenger orbits around the planets much closer to the pole.
+For decades, radar measurements have indicated that there is ice on Mercury.
+Thanks to the Messenger probe that was launched in 2004, the first to orbit Mercury, scientists can now be certain.
+Drink butter on a daily basis - and live to 168 years of age
+Southern Azerbaijan, many people biblical ages.
+There is even a museum of longevity.
+A hunt for evidence in the country in which 97 years old is still comparatively young.
+In Southern Azerbaijan, many reach ages that can almost be considered biblical.
+There is even a museum of longevity.
+A hunt for evidence in the country in which 97 years old is still comparatively young.
+The journey through the Talysh Mountains can be described as wild and romantic.
+The minibus rumbles over the winding streets, past densely wooded hills, raging rivers and simple farmhouses.
+Everywhere is green and lush - you could be forgiven for thinking you were in the Black Forest.
+However, this is the deep Azerbaijan, and the border with Iran is just a few kilometres away.
+This is the home of the Caucasian people group, the Talysh, of whom not much is known except that they speak perfect Persian and Azeri and live long lives.
+The final stop is Lerik.
+The small is bursting with overpowering architecture from Soviet times, which doesn't fit with the picturesque mountain landscape at all.
+Tourists from Europe rarely come here; the journey from Azerbaijan's capital city, Baku, is too arduous.
+It takes eight hours to travel the 323 kilometres, as too much of the route is just a single track.
+The fabulous wealth, for which the country has its oil in the Caspian Sea to thank, has not yet arrives in the province.
+Yet Pilata Fatulayeva (48) is convinced that Lerik has what takes to be a tourist attraction.
+"Baku became famous in May due to the Eurovision Song Contest, and next year we are having a festival celebrate the oldest people in the world," said Fatulayeva.
+She is the Director of the Museum of Longevity, most likely the only of its kind in the world.
+Here the lives of dozen Talysh from the who lived to older than 100 are Fatulayeva out a black & white photo.
+This here my grandfather, he was 120 years old.
+At the age of 136 he fathered another child.
+However, the unrivalled star of the museum is shepherd Shirali Muslimov who is said to have lived to 168 years old.
+However no birth certificate exists to confirm this.
+And given that the longest confirmed lifespan was 122 years of age, claim seems extremely doubtful.
+"He was born in 1805, here in the region, and died in 1973," Fatulayeva.
+The man married three times and had 23 children, and is said to have fathered another daughter at the age of 136.
+So did Shirali Muslimov miscalculate his age by a couple of decades?
+But Rembrandt Scholz, researcher on ageing at the Max Planck Institute in Rostock, has also heard of people living to impressive ages in Central Asia.
+"A strikingly high number of extremely elderly people can also be found some areas of China, in Japan or the Hunza in Pakistan," said Scholz, "while there is also an extremely large number of very old men in Sardinia."
+Due to documentation, however, there is no scientific proof of age, particularly as there are birth registers.
+Melted butter by the glass, every day
+However, the fact remains that the people of the region surrounding reach a biblical age with striking regularity.
+There are currently 20 individuals older than 100 years of age.
+So why do so many very old people live here in the south?
+The Azeri travel guide Farid Mugimzadeh explains as being due to the special Talysh genetics.
+In contrast, Museum Director Fatulayeva believes that it is due to diet.
+However the notion that the calorie-rich diet of the Talysh, who love meat, bread and especially dairy products, and of whom many drink a glass of melted butter on a basis, could be considered healthy from a nutrition science perspective does not really seem plausible either.
+Or is it the traditional way of life that keeps the people young? In Cengemiran, a not far from the town of Lerik, lives Rubaba Mirzayeva.
+At 97 years old she still comparatively young for the area.
+Mirzayeva, who claims to have 143 lives in a simple wooden house, which is typical of the Caucasus region.
+She sits on the floor with a butter churn, which she rolls backwards and forwards tirelessly.
+Eight people live here under this roof, including one of Mirzayeva's sons and a daughter, both of whom have been grandparents for some time.
+There are also two small children running around.
+In the tea is prepared for the guests, which served in typical, bulging Armadu glasses.
+Mirzayeva's white teeth stand in perfect rank and file, beneath headscarf she conceals long, dark blond plaits, which her son proudly reveals for us.
+I have always washed my hair with milk, and it never fallen out or lost its colour.
+"I have never used shampoo either," said Mirzayeva.
+Monthly pension is enough to live on
+She has only ever eaten what she could get from her own farm - tomatoes, potatoes, peas.
+My whole life I have never once bought groceries in the supermarket.
+Then she tells of her husband who was in the army.
+Things were at their worst during the time after the Second World War.
+However, everything became better when the "beloved father" Heydar Aliyev took the rudder.
+The propaganda seems strange coming from the mouth of an old lady.
+Yet the cult that revolved around the father figure for the nation, who governed his country like a dictator practically knows no limits in Azerbaijan.
+He held power until 2003 and his Ilham later took over helm.
+At least there is no deprivation among Azerbaijan's elderly.
+Mirzayeva receives 230 Manat (around the same sum in euros) per month as her pension, which in a local context is an amount which one can comfortably.
+And perhaps Mirzayeva's long greying son is right: "The elderly enjoy a deep respect in our culture."
+They live among their extended family, are loved, cared for and are happy.
+If this is not a reason to live for as long as possible, then what is?
+The notion of "human rights" is omitted from the constitution.
+The revolution has returned to Cairo.
+Competing demonstrations in Cairo reveal the deep division within the country.
+The future constitution based on Sharia law is fiercely disputed.
+The Egyptian President is not back his emotion.
+We must make the transition.
+"And making sure it succeeds is my responsibility, before the people and before God," he said on state
+His speech was aimed at the entire population,however in particular at the Coptic Christians, the liberals, enlightened and secularists.
+all of them, until now hopelessly estranged in a bewildered opposition, are fearful.
+They are fearful of a God State on the Nile at the mercy of the powerful Muslim Brotherhood.
+According to Mursi, speaking almost apologetically, he has temporarily restricted the authority of the constitutional court and increased his own authority, "in order to rescue the revolution."
+However, Egyptians - and the world - are not entirely sure what the 61-year-old engineer who holds a Doctorate from the American of Southern California, really wants save.
+Should the judiciary be deprived of power?
+In actual fact, the 234 articles, which have pushed through by the Islamic-dominated 110-person Constituent Assembly, are in some aspects cause for concern.
+As was also the case under previous constitutions, under the draft judicature justified on the "principles of Islamic law."
+are
+This was and remains subject to interpretation and there is concern that the Islamists will make use of the woolly and the resulting room for legal manoeuvre in favour of a interpretation of Sharia
+This is at least suggested by one newly added article: in all issues affecting Sharia law, the Al Ashar University be consulted, the country's important Islamic institution, which great influence throughout the whole of Sunni Islam.
+can, but does not necessarily have to mean that the clergy oversee legislation, which would result in the de facto incapacitation of the judiciary.
+Much in the constitutional draft is open to interpretation
+Also problematic: civil military jurisdiction will continue to be upheld.
+During Mubarak's rule, these courts served to suppress opposition.
+Following the fall of the dictator, up to 11,000 civilians were under military imprisonment.
+According to the draft, the state should also protect "the true character of the Egyptian family, and promote its morals and values."
+From a legal perspective, this is formulated in such an unclear manner that state institutions even use this article to control the content of cinematic art and literature.
+In plain language, this is nothing other censorship.
+Incidentally, no article explicitly establishes the equality of men and women.
+Another does prohibit insult or slander of the prophet Mohamed his emissaries.
+However, what constitutes an insult and how this should be sanctioned remains unclear.
+Equally dubious is the formulation stating that "insulting people" forbidden.
+Is a caricature of the president sufficient, or a joke at the expense of a jurist?
+Open to interpretation, like so much in the draft submitted by Mursi to be signed and that, in his own words, will be submitted to Egyptians for referendum "very soon."
+"The revolution is back"
+For weeks the opposition has been gathering to combat the strength of the Islamists.
+Tens of thousands gathered on Friday evening at the Tahrir Square in in unfamiliar unity, and pledged to bring down charter before it has even come into effect.
+"The is back and we are going to victorious," said Sabbahi, place candidate the presidential elections.
+Noble Peace Prize winner and former Head of the International Atomic Energy Authority, Mohamed El-Baradei explained that the constitutional belongs "on the rubbish of history."
+Via service Twitter, he accused Mursi's followers of wanting to lead "a coup against democracy."
+"If he calls for the referendum, we will go to his palace and overthrow him," said member of the opposition Jasser Said.
+"We have not yet grown tired, the blood of our brothers has not yet been atoned for," stated the Egyptian media, quoting opposition politician Chaled Ali.
+And several judges have signalled that they do not want to oversee the referendum, which would render it invalid.
+"The Koran is our constitution"
+The well-organised Muslim Brotherhood gathered for counter-demonstration, although acting cautiously they did not choose the Tahrir Square but rather a mass prayer on the other side of the Nile, outside the Cairo University.
+Many veiled women and followers of the Salafis took part, shouting out: "The people demand the application of God's law."
+They demanded of Mursi: "Cleanse the country!" and protested: "The Koran is our
+A struggle for control over symbolic Tahrir Square, everything began, would have most likely provoked events verging on civil war.
+Quite clearly, this was something that Mursi's followers did not want to risk.
+The Muslim Brothers stated that both those against and those in favour of the constitutional draft had expressed themselves loud and clear.
+Now is the time population decide at the ballot box, in which direction the country should move forward.
+It is a certainty that there is a majority in favour of the Islamists'
+"The term 'human rights' does not even appear once"
+Hafez Abu Saeda is about this forced constitutive process, which actually should have lasted until February and should have involved all social interest groups.
+The 48-year-old human rights lawyer and Chairman of the Egyptian Organisation for Human Rights (EOHR) defended the Muslim Brotherhood, when imprisoned or in court under Mubarak.
+Not because he shared their world view, but because for him, human rights are indivisible.
+For this he was battered, and imprisoned.
+"And now the term rights does not even appear once in the new constitution," he bemoaned in a discussion with am Sonntag."
+The lawyer has resigned himself to Mursi extending his power to all three branches of state government.
+These measures are blatant breaches of the ground rules of democracy will guide Egypt into a new dictatorship.
+"Instead of the civil society, the President effectively it," complained Saeda.
+Yet without civil society organisations, a democracy cannot function.
+Saeda feels by the international community, is observing the battle over the ideological direction on the Nile with a mixture of curiosity and excitement.
+This could come back to haunt them.
+demonstrator at the Tahrir warned: "You are letting loose a monster that you can no longer control."
+Norway's Is this the world's smelliest fish?
+Norway's five people enjoy one of the highest standards of living, not just in Europe, but in the world.
+Could the secret of the country's success be connected to the local appetite for some exceedingly smelly fish?
+Take a selection over-ripe cheeses.
+Place them in the midst of a of wet kit.
+Leave for a week.
+you have the nose-numbing smell of rakfisk, one of the great Norwegian delicacies.
+I am in the small town of Fagernes, about three from Oslo.
+There is snow, scenery - and that odour, ever present, hangs in the air.
+Rakfisk is trout sprinkled with salt and fermented in water for - depending on how smelly you like your fish - up to a year.
+As the dark sets and the turns cold, Norwegians flock to a festival here in Fagernes devoted to this most, well, captivating of
+"You eat it raw, and then swallow a glass of aquavit," says Havard firefighter but the so-called "Rakfisk General," in charge of running the festival.
+All around us people are eating little cubes of the fish and knocking back quantities of drink.
+"Some people like the aquavit more than the rakfisk," says
+The drink can kill
+try a few pieces.
+If you can avoid passing it under your nose, it is not bad - not unlike a of sushi that has been on rather a long bus journey.
+Rakfisk is a product of very different, poverty-stricken times in Norway when, pre-refrigeration, fish was soaked in airtight barrels of water and salt in autumn.
+Then in the depths of winter, well and truly fermented, it is taken out and - no doubt with the senses knocked out by alcohol - eaten.
+Only a generation ago, thousands Norwegians were forced to leave their country in search work, emigrating mainly to the US.
+Now the population is expanding fast - more than 13% are immigrants, attracted by plentiful jobs, high wages and a comprehensive care system.
+People from Sweden, the old rival and not so long ago far richer than Norway, stream in to work.
+Rakfisk is seen as signifying something important, a vital if rather smelly part of Norway's past.
+It is among the more expensive dishes you can buy.
+But then everything expensive - a small glass of beer or sandwich knock you £9 ($14)
+Norway does not often make it on to the global news agenda and most seem to like it that way.
+People here are still loath to mention name Anders Breivik, the right-wing, racist extremist who gunned down and killed 77 men, women and children last year.
+Instead, the shootings are referred to as "the July 22nd incident."
+Norwegians find it very difficult to believe that in peace-loving country one of their own was capable of such brutality and murder.
+The growth since the early 1970s of one of the world's biggest oil and gas industries lies behind much of Norway's present-day wealth.
+"But oil is not the only reason we are doing so well," says our handing round trays of rakfisk and, with her long blond hair and startlingly blue eyes, the image of Nordic well-being.
+We are a - how you say - prudent people.
+Her English, like that of most people here, is flawless.
+are not very showy, we do like ostentation.
+Norway has handled its oil wealth carefully - all but a small percentage of money from the industry is invested in a special fund for the benefit of future generations.
+When everyone else was throwing around money they did not have, in the leading up to the global financial crash, Norway kept its purse strings tightly bound.
+"As long as we can ski in winter and go hiking in summer we are happy," says Anna.
+"And eat rakfisk," she adds with a carefree laugh.
+I stand in the snow and queue for something to eat - I have had rakfisk.
+Now an elk burger is certainly something and rather succulent to the taste.
+But in the evening, it is more of that smelly fish.
+The hotel I am staying is one of a number of venues hosting a rakfisk dinner where guests vote on the - or perhaps the most nasally challenging - fish.
+There is a live TV up to a compere in a bow tie surrounded by plates of rakfisk.
+It is like Eurovision song contest.
+"What score do you have for the best fish up there in the mountains Thor-Juergen?"
+"Here are our points, Havard."
+There is clapping, laughter.
+A man falls off his chair, perhaps overcome with aquavit.
+Or maybe it is the fumes from all that fish.
+Mexico's Enrique Pena Nieto faces tough start
+As Mexico's incoming President Enrique Pena Nieto prepares to take office, the BBC's Will Grant looks at the challenges facing him and the mixed expectations of his population.
+Traffic in Mexico City is particularly bad at present.
+A congested city at the best of times, ring of steel has been erected since Monday cutting off several key routes into the capital and causing chaos on the roads.
+The aim, however, wasn't to stop commuters getting to work but prevent protesters from reaching parliament.
+On Saturday, Mexico's new president Enrique Pena Nieto will receive the presidential sash and take over the running of the nation.
+He faces complicated task.
+Mexico has been performing well economically under the outgoing administration of Felipe Calderon, but the country the grip of a drug war, which has already claimed an estimated 60,000 lives in six years.
+"My government has a great commitment to the Mexican people to reduce the violence," Mr Pena Nieto told US President Barack Obama the Oval Office earlier this week.
+I will proposing a new security which will allow us to achieve that aim.
+Before rubbing shoulders with the US president, Mr Pena Nieto's previous political experience was as governor of his home state, the State of Mexico.
+A populous, sprawling state surrounding the capital, about the new leader are divided in his old stomping ground.
+A straightforward man
+In the bucolic town of Valle del Bravo, for example, he is remembered fondly.
+Residents credit him with boosting tourism in the resort and
+To reach the town you can drive along one of Mr Pena Nieto's new motorways, vast improvement on the cracked and bumpy roads it replaced.
+Plaques bearing his name also hang outside a modern sports and an impressive interactive museum about climate change.
+"We are looking to him to bring about real and lasting change," says friend and political ally Gabriel Olvera Hernandez, a state congressman for Mr Pena Nieto's party, the
+Particularly in terms of security and the economy, we're hoping for an interesting and true change which our country so badly needs.
+After an unbroken 81 years in power, the PRI was ousted in 2000 by Vicente Fox.
+Congressman Olvera that after 12 years outside the presidential palace of Los Pinos, there is much expectation within the party about Enrique Pena
+And he rejects the opposition's characterisation of the new president as lacking substance.
+He's a very straightforward man, very committed with an excellent vision of the country.
+He's an statesman and, above all, someone who knows how to
+But on the other side of the state, that is not the impression many people have of their former governor.
+In Nezahualcoyotl, also known as Ciudad Neza, the contrast with the cobbled streets of Valle del Bravo couldn't be sharper.
+Tucked away under motorway flyovers, it is in many ways a suburb of Mexico City itself.
+And the problems in the municipality are also gritty and urban.
+Earlier this year, the military was called in to help tackle the drug gangs operating in the neighbourhoods, and violence against women is particularly acute.
+On a patch wasteland by a vast site, the bodies of dozens of murdered have been dumped over the past two years alone.
+More than 1,000 women were killed in Mexico State while Mr Pena Nieto was governor, a rate much higher than in the notoriously violent city of Ciudad - a place synonymous with the murder of innocent women.
+Mr Pena Nieto's critics say, at best, he failed to adequately address the problem of femicide while he was in office.
+At worst, they accuse his administration of turning a blind
+In a concrete home typical of the rundown neighbourhood, Irinea Buendia struggles to fight back the tears she shows me photos of her late daughter, Mariana Luna.
+According to the official version of events, Mariana committed suicide in 2010.
+However her family believes she was murdered by her partner.
+"When I arrived at house it seemed her body had washed," Senora Buendia recalls.
+There were signs she'd been beaten, and rigor mortis had already set in.
+As her mother recounts the story, a picture of Mariana looks down from the walls, next to a cross bearing a single word: Justice.
+However, that is exactly what the family say they have been denied.
+state authorities have treated me like I'm an old gossip, a trouble-maker, whiner.
+What they want is that one simply accepts what they say and shuts up.
+"But that can't right when there were so many irregularities and omissions," she says.
+As President Pena Nieto receives the sash on Saturday, it with a heavy responsibility.
+Tens of thousands of families have been affected by violent crime in Mexico over the past six years and the president has promised to make a priority during his time in office.
+"I hope he's the same kind of president as he was a governor," says PRI Congressman Olvera in Valle del Bravo.
+That, however, is exactly what victims' families in Ciudad Neza fear.
+Bradley Manning didn't complain about mistreatment, prosecutors contend
+Prosecutors try to counter Bradley Manning's claims of abuse in confinement
+The hearing focuses on Manning's time the military brig at Quantico, Virginia
+Defense wants case dismissed on grounds that Manning's confinement was harsh
+The Army is of stealing thousands of classified documents
+Prosecutors tried to establish Friday that Army private Bradley Manning -- charged in the largest leak of classified material U.S. history -- missed multiple opportunities complain about the mistreatment he's alleging in military custody.
+While cross-examining Manning at a pre-trial hearing at Ft. Meade, Maryland, prosecutor Ashden Fein asserted that records of weekly visits Manning had with unit officers during nine months of detention at Quantico, Virginia, show no complaints about treatment.
+The cross-examination -- during a hearing on a defense motion to have Manning's case dismissed on grounds that his confinement has been harsh and has amounted enough punishment -- came day after Manning testified that he had considered suicide while in custody.
+The Army analyst, arrested in June 2010, is accused of stealing thousands of classified documents while serving in Iraq.
+The material was then published online by WikiLeaks.
+WikiLeaks has never that Manning was the source of its information.
+In Friday's hearing, Fein reviewed with Manning the forms that officers out after meeting with Manning during his detention at Quantico's brig, where he was held under a heightened confinement status from July 2010 to April 2011.
+Officers would ask Manning questions write down his responses.
+When Fein asked about the forms Friday, Manning acknowledged that he rated treatment his guards as "excellent" and treatment by the facility overall as "very professional."
+The show no complaints of mistreatment, even though the officers Manning directly treatment, Fein contended.
+Manning responded that he would verbally express concern about issues and that the visiting officers would talk through the concerns and indicate that they would be addressed, they didn't record the issues.
+"They would write down 'no issues' (after discussing the concerns), and it didn't necessarily mean I didn't bring something Manning said.
+The judge, Army Col. Denise Lind, also asked Manning why didn't complain about treatment during a January 2011 meeting with a board examining the suicidal thoughts he expressed in a form months earlier.
+Manning replied that his intention during meeting was to get his "prevention of injury" status downgraded.
+The military said they put him on this restrictive status -- a step below suicide watch -- for his protection and the safety of others.
+"I staff to know I was fine, and (I to) get off the POI status ... to enjoy an increased quality of life from my viewpoint," Manning said.
+Manning testified Thursday about his arrest in Iraq and his transfer to Kuwait, where held for nearly two months before being transferred to the brig at Marine Base Quantico in Virginia in July 2010.
+He said he contemplated suicide in Kuwait and once passed out there due to the heat.
+He said not being allowed to know what was happening to him or in outside world was distressing.
+"My world just shrink to Camp Arafjon, to that cage," Manning said Thursday.
+I thought I was going to die in that cage.
+at Quantico, Manning said, he spend most days in a small cell -- least hours and often more than 23 hours -- with no company.
+Manning said he was allowed a mattress, blanket, flip-flops, some clothes and his glasses.
+He said he to keep moving, because sleeping during the day or even lying down was against the rules.
+Manning said he always with light from outside his cell in eyes.
+If guards not see his face when he rolled over night, he said they would wake him to roll back over.
+Manning's lawyer filed a formal objection to Manning's treatment in January 2011.
+was to the military prison at Fort Leavenworth, Kansas, in April 2011.
+Also Friday, the judge asked Manning about an allegation that he made in Thursday's testimony -- that after being forced to sleep naked one night in his Quantico cell, was forced to stand naked in front of guards and other during a morning head count.
+Manning had testified that he was never given a chance to cover himself with his blanket during the head count.
+Under questioning the judge Friday, Manning said that he inferred from his guard's order that he should drop a blanket that could covered him, but he acknowledged that no one had ordered him to drop it.
+Manning testified Thursday that he was forced to sleep naked the night because of his attempt to show an officer that he wasn't a danger to himself.
+Manning said that he told the officer that he could have used the waistband of his underwear or his flip-flops to hurt but hadn't done so.
+That night, Manning testified, his underwear, flip-flops glasses were removed from his cell.
+His lawyers hope the judge will at least take his experiences during into account and sharply reduce his sentence should he be convicted at his court-martial, which is expected to begin early year.
+The defense has said it plans to have Manning plead guilty to lesser offenses and other charges as being extreme.
+The hearing is scheduled to resume weekend, with prosecutors expected to argue that the detention conditions were warranted.
+The Pentagon has maintained that Manning was held in accordance with rules governing maximum-custody detainees at Quantico.
+Counts against Manning include aiding the enemy, wrongfully causing intelligence to be on the Internet, transmitting national defense information and theft of public property or records.
+If he's convicted on all counts, he could face a life
+My Mexican-American identity crisis
+He says many were forced to leave Mexico because of the lack of opportunities there
+Mexicans tend to fault those who left; they remind Mexicans of hard times, he says
+Navarrette says Mexican-Americans are caught between two worlds
+On a recent trip to Mexico City, I had barely made my way down the concourse arrived at the immigration processing area when I got stumped.
+pointed the way to two for "Mexicanos" ("Mexicans"), another for "Extranjeros" ("Foreigners.")
+I stood there for a few seconds, unsure of where to go.
+Growing up in Central California, I had been called "Mexican" life.
+It's ethnic shorthand in the same that my friends in Boston refer to themselves as "Irish" or my friends in New York describe themselves as "Italian."
+Later, I settled on "Mexican-American."
+But, this was Mexico.
+And, in the homeland of my grandfather, there was no need for shorthand or hyphens.
+I was simply an American.
+I speak Spanish, enough to handle either end of an interview in language.
+But I have the vocabulary of a native, and I can't shake American accent.
+So I took my U.S. passport got in the line for Extranjeros.
+I thought about that moment this week when Mexican president-elect Enrique Pena Nieto visited the White House to meet with President Obama.
+On the agenda, as usual, when the leaders of these two countries meet: immigration, drugs and trade.
+Pena Nieto was also eager to talk about the growth of the Mexican economy, is one reason that Mexicans are now just as likely to stay in Mexico as to the United States.
+He wants to partner with the United States and Canada, and create a European trading bloc in North America.
+And Pena Nieto vowed to Mexico's war against the drug cartels, even he no specifics.
+For Mexico, the relationship with the United States is and filled hard feelings.
+Most Americans probably never give a thought to the fact that, 1848, the United States invaded Mexico and forced leaders to sign over half their territory at the point of rifle.
+But for Mexicans, who think in terms of centuries, not minutes, the are everywhere.
+So minute that a U.S. official says anything the least bit critical of Mexico, you start hearing -- in the Mexican press, and among the elites -- complaints about how the Americans are encroaching upon their neighbor's sovereignty.
+And the children of Montezuma go on the warpath.
+And for Mexico, the really challenging relationship is with more than 35 million Mexican-Americans living in the United States.
+You want to talk about hard feelings?
+There is plenty.
+Mexico has winners and losers, people for whom the country provides opportunities and others for whom it doesn't.
+The only reason you have so many of Mexican ancestry living in cities like Los Angeles, Las Phoenix, Denver or is because, at some in our family tree, a person, maybe a parent or grandparent, who was shut out from opportunity in Mexico had to go north.
+And more often than not, that person fit a profile dark skin, little education, from a poor village, etc.
+We're their offspring, and we're loyal to them.
+Not Mexico.
+And even though we now be living the American Dream, gone to good schools and taken good jobs, we can never lose sight of the fact that it's the American Dream we're living, and not the Mexican one.
+Our identity might sometimes be but our loyalty is clear.
+It's to the United States.
+Besides, we're aware that many of the elite Mexicans in the ruling class don't like us.
+The feeling is mutual.
+They see us as a reminder of a humiliating defeat and look on us as inferior that isn't sufficiently Mexican.
+Our Spanish will never be good enough, our ties to Mexico never strong enough.
+Our existence is, as they see it, all about failure.
+If our families hadn't failed in Mexico, they wouldn't have left.
+And we wouldn't now find ourselves trapped behind the silk curtain, living well in the United States but lost souls nonetheless.
+My wife, who was born in Guadalajara and came to the United States legally as a child, reminds me that there is friction between Mexicans and Mexican-Americans because Mexicans have a firmer grasp of they are and Mexican-Americans resent that.
+While she's a U.S. citizen, she sees herself as a part of two
+Meanwhile, many Mexican-Americans I know don't feel like they're a part of either.
+We love listening to the Mexican band, Los Tigres del Norte, but also to Bruce Springsteen.
+You get the best of both worlds, but you're rooted in
+In Mexico, we're seen as
+And in the United States, we're considered Mexican.
+Now, to complicate the relationship even further, as learned during my some Mexican leaders and parts of the intelligentsia want to reconnect with the Diaspora.
+They want to put Mexican-Americans to work as makeshift "ambassadors" for Mexico, representing its interest in the United States.
+We would tell our fellow Americans what a great country this is to visit and pressure political leaders to strengthen ties with Mexico.
+Yeah.
+That's not going to
+Too many hard feelings.
+And, income inequality and rampant corruption and drug violence, many of us are not so that it is a great country.
+I'm afraid you're on your own, amigos.
+That's fair.
+If at least some Mexicans aren't yet ready to forgive the United States for how it treated Mexico a century a half ago, then they have to accept the fact that some Mexican-Americans hold a grudge for how their family members were treated much more recently than that.
+Hmmm.
+Maybe we're more "Mexican" than I thought.
+Old battles, new Middle East
+The ceasefire between Israel and Hamas could yet be an unlikely foundation for peace
+Can there ever be a lasting peace between and Jews in Middle East?
+Another round of bloodshed suggests that any such hope is vain.
+Amid the usual futile arguments over who started it, scores of buildings have been reduced to rubble; more than 140 Palestinians, most of them civilians, and six Israelis have been killed; and, for the first time, missiles from Gaza have landed near Tel Aviv, Israel's metropolis, and the holy city of Jerusalem.
+But though the Israelis and Palestinians seem stuck in their ancient conflict, all around them Middle East is changing.
+The Arab spring pieces up in the air, and, like it or not, the Palestinians Israelis are caught up the regional turmoil.
+Maybe this will make their struggle bloodier than
+However, there are reasons for thinking it could just break lethal stalemate.
+A war that is neither lost or won
+At first sight, looks very hard to justify
+Even if the ceasefire agreed on November 21st holds, this week's fighting has strengthened the hawks on both sides.
+The leaders of Hamas, the Islamist that has ruled Gaza since 2007, will claim have forced the Israelis to back off, even though Gaza has taken a drubbing.
+killing some of its leaders bottling up Gaza's 1.7m people in one of the most wretched and crowded corners of the planet, Israel has failed to Hamas.
+Indeed Hamas is gaining on the West the other bit of Palestine currently run by its bitter in Fatah, the more moderate Palestinian faction.
+Moreover, Hamas's leaders may well that is on their side.
+As Islamists across the Arab world have gained clout, so Hamas has made powerful and rich friends.
+Turkey, a resurgent regional power that was once closest Muslim ally, has taken up Hamas's cause; so has Qatar, one of the richest and most dynamic of the Gulf states.
+Jubilant Hamas people say an Islamist crescent is curving around Israel, from Lebanon in the north, where the Hizbullah party-cum-militia holds sway, through Syria, where rebels of an increasingly Islamist bent may topple Bashar Assad, and on down through Jordan, where Hamas's allies are menacing the king.
+Above all, on Israel's flank, the rise of the Muslim Brotherhood under President Muhammad Morsi in by far the most and pivotal of Arab countries, has changed the region's balance.
+Hosni Mubarak, the secular despot who ran Egypt for 30 years until his downfall in 2011, had little time for Hamas.
+By contrast, the Brotherhood is a cousin of Hamas, and its leaders more subject to popular opinion.
+In future diplomacy Hamas may emerge as an actor that cannot be shut out even by Israel America.
+Meanwhile, Israel's hardliners will draw the opposite conclusions.
+military terms, Hamas has been put back in its box.
+Israel's Iron Dome anti-missile system has its worth and many of Hamas's have been destroyed.
+Israelis will sleep more soundly - for while.
+In diplomatic terms, America is as as ever; many European countries also blamed Hamas for starting the latest round of violence.
+Above all, Israel has prospered, especially Binyamin Netanyahu, a prime minister who has largely ignored the peace process.
+Although rockets from Gaza have killed around 30 Israelis since 2004, Israel has been fairly free of suicide-bombers, thanks in part to the barrier that bites into the West Bank, main chunk a would-be Palestinian state, and protects the Jewish settlements that continue to expand despite their illegality in international law.
+Mr Netanyahu, whose Likud party has with an even more hawkish lot under Avigdor Lieberman in the run-up to an election on January 22nd, is sitting pretty.
+Why coddle those Palestinians by giving them a state of own?
+If they really ran the West Bank, would they not fire rockets, just as their compatriots have done in Gaza?
+Better to keep them behind that wall and smite them if they raise their heads.
+Maybe the hardliners will win out; yet the Arab may change calculations.
+Even if the Islamists taking power in Egypt and have little love for their priority will be difficulties at home.
+Israel's defence budget is bigger than that of its four Arab neighbours combined.
+Starting a war with the superpower will hardly help the new Arab governments mend their economies.
+That the pragmatic Mr Morsi worked with Barack Obama to obtain a ceasefire augurs well - and might just mark the start of something.
+too should look to the longer term.
+With the rest of Arab world becoming more democratic, depriving Palestinians of their right to self-determination is creating a powder keg that is bound one day to explode in the territories occupied by Israel - much as a bus exploded Tel Aviv this week.
+Repression is already undermining democracy the Jewish state, and exacerbates this as the Arab population
+Bloody missions against Gaza every few years to knock back Hamas will exact a growing diplomatic toll.
+Both sides prodding outsiders
+The answer remains the one trumpeted by sensible people on both sides, most of the outside world and this newspaper: two states, with Israel ceding territory for security.
+The hope a small one in the short term - is that the ceasefire will give a little more leverage to outsiders pushing that
+Egypt, which must now set about stopping the flow of arms into Gaza, along Turkey and Qatar, is better placed than ever to persuade Hamas to accept the idea of a Jewish state based on the 1967 boundaries with land swaps a shared Jerusalem.
+Arab outsiders should also press Hamas and to come together.
+That do more to create a Palestinian state the imminent bid for virtual statehood at the
+Mr Obama also has a in getting Israel to the table.
+During his first term, he neglected to his own plan for peace.
+Back in the White House, he is looking just as reluctant to be drawn in.
+is woefully short-sighted.
+America has a interest in a stable Middle East.
+That means a peace settlement between Israel and the Palestinians.
+plain packaging laws come into force in Australia
+Smoking warnings and diseased body parts emblazoned on dull green boxes that are the same for all tobacco brands
+Australia's world-first laws on cigarette and tobacco plain packaging have come into force, replacing brand logos and colours with generic drab olive coverings, gruesome pictures of diseased body parts and depictions of children and babies made by their parents' smoking.
+Apart from the varying health warnings and images the only difference between the packs, mandatory from are the brand names, and these are all printed in identical small font.
+It is the world's most strict regime for the packaging of tobacco.
+Australia's federal government says the aim is to young people from smoking by stripping the of glamour.
+It is relying on studies showing if people have not started smoking by age 26 there is a 99% chance they will never take it up.
+"Even from a very early age you can see that kids understand the message that the tobacco company is trying to sell through their branding," said the federal health minister, Tanya Plibersek, citing studies that showed, for example, children linking a crown in a logo with the idea of being a princess.
+While Australia has one of the world's lowest smoking rates and the changes will have little impact on multinationals' profits, other countries are considering similar steps.
+The tobacco industry lobbied hard against the laws.
+Tobacco firms said they would boost black market trade, leading to cheaper, more accessible cigarettes.
+"There will be serious unintended consequences from the legislation," said Scott McIntyre of British American Tobacco Australia.
+Counterfeiters from China and Indonesia will bring lots more these products down to sell on the streets of Australia.
+Others say the laws have boosted their business.
+Sandra Ha of Zico Import Pty Ltd, a small family business, said demand for cigarette cases, silicon covers mask the unpalatable packages, had shot up from almost nothing two months ago since British American Tobacco, Imperial Tobacco, Philip Morris and Japan Tobacco lost a challenge to the laws in Australia's high court.
+Ha said Zico had sold up to 6,000 to wholesale outlets and was awaiting new stock.
+This is good business for us.
+The potential hitch, experts is the popularity of social media with the very demographic is targeting.
+After a series of Australian laws banning advertising and sponsorship and requiring most sellers to cigarettes from view, tobacco marketing has moved online.
+Australia has banned web advertising by local companies and sites cannot restrict overseas sites.
+"If you are a marketer and you've only got this small window left to promote your products, online is the compelling place for to be in," said Becky Freeman, a public health researcher at Sydney University.
+Freeman an increase in "average Joe" reviews of brands on social media sites such as YouTube, Twitter and Facebook.
+We have to ask, is just a private citizen who really loves Marlboro cigarettes and they've gone to the trouble of making video, or is there a marketing company involved?
+British American Tobacco Australia said the industry was focused on dealing with the new rules rather than marketing.
+industry has gone as far as paying for Ukraine, Honduras and the Dominican to the new rules - the countries are at the World Trade Organisation that trade is being unfairly restricted, despite none of the countries having significant trade with Australia.
+A WTO ruling is likely in mid-2013.
+Plibersek said the government held discussions with other countries considering similar laws on packaging.
+Canada was the first country to make photograph warnings mandatory in 2001.
+They now extend to more than 40 countries including Brazil, Turkey and Ukraine.
+Tougher laws are being considered in Britain, New Zealand, South Africa and India.
+Many smokers in Australia remain defiant.
+The don't affect me.
+I just ignore them.
+"You just grab a smoke and put it away," Victor El Hage as he purchased a pack with a photograph of a mouth tumour.
+Honestly, there's only one reason I'd stop, and that's my little girl.
+James Yu, who runs the King of the Pack tobacconist in central Sydney, said the uniform packaging made it harder to stack his shelves
+"It used to take me an hour to unload a delivery, now it takes me four hours," Yu said.
+"The government should have just banned them altogether then we'd go fine, we're done, we'll shut up shop," said, his hands up in the air.
+In Constantly Plugged-In World, It's All Bad to Be Bored
+spent five unexpected hours in an airport this Thanksgiving holiday when our plane had mechanical difficulties and we had to wait for another plane to arrive.
+So I had plenty of time to think about the subject of boredom.
+won't lie to you.
+Half a day in an airport waiting for a flight is pretty tedious, even with the distractions of books, magazines and iPhones (not to mention duty-free shopping).
+But increasingly, some academics and child development experts are coming out in praise of boredom.
+It's right for us and our children - to be bored on occasion, say.
+It forces the to go on interesting tangents, fostering creativity.
+And because most of us are almost consistently plugged into one screen or another these days, we don't experience the benefits of boredom.
+So should we embrace boredom?
+Yes.
+And no.
+But I'll get back to that.
+First of all, like many people, I assumed that boredom was relatively recent phenomenon, with the advent of more leisure time.
+Not so, says Peter Toohey, a professor of Greek and history at the University of Calgary in Canada and the author of "Boredom: A Lively History" (Yale University Press, 2011).
+"Boredom actually has a very long history," he said.
+There's Latin graffiti about boredom on the walls Pompeii dating from the first century.
+Then there's of how we define boredom.
+The trouble is that it has been defined, and discussed, in many different ways, said John D. Eastwood, an associate professor of psychology at York University in Canada.
+After looking over the research literature and putting the idea in front a focus group of about 100 people, Professor Eastwood and colleagues defined boredom as experience of "wanting to, but being unable to in satisfying activity."
+What separates boredom from apathy, said, is that the person is not engaged but wants to be.
+With apathy, he said, there is no urge to do something.
+The core experience of boredom, he said, is "disruption of the attention process, associated with a low mood and a sense that time is passing slowly."
+Boredom can sound an awful lot like depression.
+But Professor said that while they can be related, people who tend to see the problem as the environment or the world, while people who are depressed see the problem as themselves.
+Sometimes we think we're bored when we just have concentrating.
+In their study, "The Unengaged Mind: Defining Boredom in Terms of Attention," which appeared in the journal Perspectives on Psychological Science in September, Professor Eastwood and his colleagues pointed to an experiment in which participants listened to a tape of a person reading a magazine article.
+Some groups heard a loud and unrelated television program in the next room, heard at a low level so it was barely noticeable, while the third group didn't hear the soundtrack at all.
+The ones who heard the low-level TV reported more boredom than the other two groups - they had concentrating but were not sure and attributed that difficulty boredom.
+When you're trying to focus on a difficult or engaging task, disruption of attention can lead to boredom, said Mark J. Fenske, an associate professor of neuroscience at the University of Guelph in Ontario and one of the authors of the study.
+On the other hand, when you're doing something dull, "such as looking for bad widgets a factory line, distracting music can help you not be bored."
+In fact, he we now know that squirming and doodling, often seen as a sign of boredom, can actually help combat it by keeping people more physically alert.
+"Research shows that kids who are allowed to fidget learn more and retain more information than those who are forced to sit still," Professor Fenske said.
+We all experience boredom at some points - my flight delay, a droning speaker, a particularly tedious movie.
+But some individuals are more likely to be bored than others.
+To help measure this, researchers developed a "Boredom Proneness Scale" in the 1980s.
+The scale includes questions like, "Many things I have to do repetitive and monotonous," and "I have so many interests, I don't have time to do everything."
+Using such scales, researchers have discovered that boys tend to be bored more often than said Stephen Vodanovich, a professor of psychology at the University of West Florida, especially when it comes needing more, and variety of, external stimulation.
+But in general, teenagers are a pretty jaded lot.
+In 1991, Reed Larson, a professor of human and community development at the of Illinois, conducted an experiment in which he contacted almost 400 teenagers and their seven to eight a day by beeper.
+He found that 32 percent of said they were bored in school and doing homework, while 23 percent said they were bored when they weren't in school.
+On the other hand, 3 percent of parents said they were bored.
+Larson he did not whether boredom percentages now, 21 years be higher or lower.
+But he said he did know that "adolescence is a peak period for largely because children and teenagers are not given a lot of over what want to do.
+So back to my original question: Is boredom good for
+Sometimes no, in its extreme it can lead to take absurd physical risks, gamble or in substance abuse as a way to it, research shows.
+On the other hand, many philosophers and writers discuss the connection between boredom and creativity, said Professor who has been studying the issue for more than two decades.
+"Boredom is the brain's way to tell you you should be doing something else," said Gary Marcus, a professor of psychology at N.Y.U.
+But the brain doesn't always know the most appropriate thing to
+If you're bored and use that energy to play guitar and cook, it will make you happy.
+But if you watch TV, it may make you happy in the short term, but not in the long term.
+So if child is and you give him an iPad, not be bored anymore, but he hasn't learned how to entertain himself, or self regulate, Professor Fenske said.
+And "that self-regulation transfers from one situation to other," he said.
+Your doesn't just learn entertain himself, but gets more self-control in other areas.
+I don't think we really want to celebrate boredom.
+Nor should we be too of it.
+Rather, our goal should be to feel comfortable away from the constant of activity and technology.
+Professor Eastwood agreed.
+"We frame it as we need to be bored more, but boredom is an agonizing, restless desire to be connected with something meaningful," he said.
+What people are really searching for, he is a way to unplug and enjoy down time.
+"In an environment where we are constantly overstimulated," he said, "it's hard to find ways to engage when the noise down."
+In Colorado, No Playbook for New Marijuana
+Anthony Orozco, 19, community college student and soccer player in southeastern Colorado, is facing criminal charges for something that will soon be legal across state: the possession of a few nuggets of marijuana and a pipe he used to smoke it.
+Mr. Orozco said that one day in September he and a few friends were driving in Lamar, on the plains near the Kansas border, when they were pulled over.
+After the police officer found marijuana in the car, Mr. Orozco was issued a summons for possession and paraphernalia - petty offenses that each carry a $100 fine - and given a court date.
+"We get treated like criminals," Mr. Orozco said.
+But is he one?
+In the uncertain weeks after Colorado's vote to legalize small amounts marijuana for recreational use, the answer in hundreds of minor drug cases depends less on the law than on location.
+Hundreds of misdemeanor marijuana cases are already being dropped here and in Washington State, which approved a similar measure.
+Police departments have stopped charging adults 21 years and older for small-scale possession that will be legally sanctioned once the laws take effect in the coming weeks.
+But prosecutors in more conservative precincts in Colorado have vowed to press ahead with existing marijuana cases and are still citing people for possession.
+At the same time, several towns from the Denver suburbs to the Western mountains are voting to block new, state-licensed retail marijuana shops from opening in their communities.
+"This is evolving so quickly don't know what's going to happen next," said Daniel J. Oates, the police chief in Aurora, just east of Denver.
+Regulators in Washington State are also scratching their heads.
+And they are looking for guidance on how to set up a system of licenses for production, manufacturing, distribution and sales - all by a deadline of Dec. 1, 2013.
+They say that Colorado, better or worse, is ahead of most states in regulating marijuana, first for medical use and now recreationally.
+"Colorado has a more regulated market, so they will be a good guide," said Brian E. Smith, a spokesman for the Washington State Liquor Control Board.
+no place or system, Mr. Smith conceded, can do more than suggest what might work.
+"There's no real precedent for us to follow," he said.
+Washington's law, called I-502, takes effect on Dec. 6, also leaves year of limbo during which the state licensing system not exist, but legalized possession will.
+And there are thorny mechanical questions that must be resolved during that time, like how to balance the state's mandate of "adequate access" to licensed marijuana with its prohibitions on cannabis businesses within 1,000 feet of a school, park, playground or child care center.
+"Nowhere will it be more difficult to site a licensed cannabis business than in urban areas, particularly in the Seattle metropolitan area," said Ben a spokesman for Center for Legal Cannabis, a recently formed research group.
+On Nov. 21, Chief Oates in Aurora sent his officers an e-mail announcing that the city attorney would no longer be prosecuting small marijuana violations for anyone 21 years or older, and that the would stop charging people for those crimes "effective immediately."
+Chief Oates said that the police enforce city codes regulating medical growers, and that they would pursue drug traffickers and dealers.
+In northern Colorado's Weld County, the district attorney, Ken Buck, represents stricter view.
+After the vote, said his office would continue pursuing marijuana possession cases, mostly as a to press users into getting treatment.
+Right now, 119 people face charges of two ounces or less of marijuana, though many are facing other charges.
+"Our office has an obligation to prosecute offenses that were at the time they occurred," Mr. Buck said in a statement.
+The response has been complicated even in places like rural Mesa County, where voters marijuana
+police in Grand Junction, the county's largest city, are no longer citing adults for possession of small amounts.
+county's district attorney, Pete Hautzinger, supported that decision, but also decided not to dismiss all of the pending possession cases.
+"I do not think I'm wasting my time to enforce the law until it changes," he said.
+Although 55 percent of Colorado voters supported the measure, bringing marijuana into the folds of government and legal system was never going to be simple.
+And the contradictory reactions across state lay bare a ambivalence among local officials about the state's big green experiment.
+"It's a cultural with district attorneys, said Sean McAllister, a Denver lawyer represents marijuana defendants and is a spokesman for the National Organization for the Reform of Marijuana Laws.
+"They spent so much of their lives prosecuting people that they still don't really accept that this is legal," he said.
+As the first states to small amounts of marijuana like alcohol, Colorado and Washington are poised to become national test drug legalization.
+As advocates and state officials plan for a new frontier of legalized sales, they are also anxiously awaiting direction from the federal government, which still plans to treat the sale and cultivation of marijuana as federal crimes.
+Advocates for legalized marijuana are hoping the Justice Department yields.
+Despite some high-profile arrests of medical marijuana sellers, the federal government has mostly allowed medical marijuana businesses to operate Colorado, Washington and 16 other states.
+While drug agents will probably not beat down doors to seize a small bag of the drug, are likely balk at allowing state-regulated recreational marijuana shops allowed under the new laws, said Kevin A. Sabet, a former drug policy adviser in the Obama administration.
+Several cities in Colorado are not waiting for federal authorities act.
+Even before Election Day, some local governments approved moratoriums on any new marijuana shops, even though it will be about year before any can open.
+Last week, the western city of Montrose took up a six-month ban, and is likely to pass it next week.
+"We don't want put in a position where we license and then have a big federal issue," said Bob Nicholson, a City Council
+Our community voted against this amendment.
+We're looking at what the community voted for versus what the state voted for.
+There's an awful lot of questions.
+Petronella Wyatt: I was bullied out of Oxford for being a Tory
+It not just today's university students who are attacked for their views
+I can't remember a time when I didn't dream of a place at Oxford University.
+Both my father and my elder brother had been at what I imagined was the world's greatest seat of learning, a modern-day wine-blushed Greek symposium encouraging the pillars of civilisation, free thinking and tolerance.
+Yet, within two weeks of taking up my place at Worcester College in the late to read history, I'd packed my bags, precipitating the first scandal of my life.
+My father broke down and cried.
+Friends were baffled.
+The Evening Standard diary claimed I'd quit because I objected to fellow having sex in the room to mine.
+The writer A N Wilson announced waggishly that I'd departed because I was forced to drink out of chipped mugs.
+The was less droll.
+I ran away.
+Yes, ran, because I had been subject to bullying and intimidation.
+Not on account of my rather outré name, or the fact that I came from a private school.
+I was persecuted for one reason only, and in this cradle of supposed enlightenment it was both bigoted and barbaric: father, the late Woodrow Wyatt, was a high-profile adviser to Margaret Thatcher and I was Conservative supporter.
+Why bring up now, you might ask.
+Well, recent reports suggest that a new generation of Right-of-centre students are suffering a similar persecution.
+Such is the and increasing hatred of Tory students at Oxford that last week a group of them demanded the same equal-rights protection as gays, disabled people and ethnic minorities.
+Conservative members of Christi College's junior common room (JCR) claim they are "often actively isolated, personally attacked and made to feel unwelcome" because of their political views.
+They want to create a post on the college's equal opportunities committee to ensure that their opinions can be aired freely.
+Their situation wasn't helped a BBC Two documentary, Wonderland: Young, Bright and on the Right, about student politics, which portrayed as oddballs and neo-Nazis.
+It featured graduate Joe Cooke, former president the Oxford University Conservative Association (OUCA), travelling in a Rolls-Royce, sporting a suit and silver-topped cane.
+At other students say they are being treated as "scapegoats" for the introduction of tuition fees."
+Luke Black, 20, vice-president of Nottingham University Conservative Association, told a Sunday newspaper that "there is a growing Left-wing bias at universities.
+People assume we are like the Bullingdon Club without meeting us."
+Samuel Roberts, 21, a student at Christi, who proposed the motion for greater protection, says such a climate is "uncomfortable," while Stephanie Cherill, 19, president elect OUCA, says there has been a deterioration in the attitude of JCR members towards people who are Right of centre.
+"This poses a threat to the atmosphere of intellectual discussion, as well as to the welfare of members," she says.
+I was in a minority of one during my few weeks at Oxford.
+I had gone up in September a cripplingly shy 18-year-old.
+Hatred of the Conservative Party was at its most febrile.
+The year before, the university had voted to refuse Margaret Thatcher - a former student - an honorary degree, because of in higher education funding.
+The atmosphere would have made a Stalinist shudder with apprehension.
+During the first few days of freshers" week, when new students socialise with each other and the dons, I had a taste of the wormwood that was to come.
+I was to find that the dons not only connived in the of Tory undergraduates but took part with relish.
+The politics of the miners" strike, privatisation and the government's opposition to sanctions against apartheid South Africa were brought into the wood-panelled rooms of the tutorial.
+My first one involved translating 18th-century French into English, and I was unprepared for what followed.
+"Miss Wyatt," said the don, Harry Pitt (now deceased), "please translate the first paragraph."
+I stumbled over it.
+A small with a face like cake batter, Pitt was big on bile.
+"Do Thatcherites refuse to learn French or are they just stupid?" he demanded.
+The other undergraduates giggled.
+Tears pricked the back of my eyes.
+"I suggest you take some basic lessons in your spare time - that is, you're too busy socialising," Pitt snarled.
+I walked to my rooms a disconsolate figure.
+At dinner in college that evening I sat by myself; then I felt a light tap on my shoulder.
+It was a second-year English student named James who introduced himself as a member of the OUCA.
+"I know who you are," he said kindly.
+I'm afraid it's like that.
+Anyone suspected of being a Tory is picked on.
+It's bad enough for me, but they know your father is close to Margaret Thatcher, so it will be worse for you.
+Most Tory freshers pretend they're Labour.
+Later, at a local I cravenly attempted to dissimulate.
+I insisted I didn't agree with everything Mrs Thatcher said.
+This ploy proved unsuccessful.
+A first year student, who, ironically, had been to Eton, said: "You're the daughter a fascist pig."
+You're contaminated.
+Other students took up the refrain.
+I was perverted, dirty.
+"How do Tories have sex?" one asked.
+They beat each other, don't they?
+I felt the homosexuals have felt before the liberal legislation of the Sixties.
+Would I ever be to lead a normal life at Oxford?
+Would I be forced to meet like-minded people only after dark?
+Would I have to turn to Labour and suppress my inclinations?
+The three years me stretched out as a purgatory of ostracism and isolation.
+The only openly Tory don was Norman Stone, Professor of Modern History, who was based at my college.
+He was hated for not only a Conservative but a foreign policy to Thatcher and one of her speech writers.
+He was hardly ever there.
+He loathed the place as provincial and petty, and for its adherence to the Marxist-determinist view of history.
+In 1997 he took up a professorship at the University of Bilkent, in Ankara, Turkey.
+"You won't be happy here," he told me.
+I began commuting from Oxford to my parents" house in London, finding refuge with my more open-minded metropolitan friends and family.
+I told my father I hated Oxford and why.
+He was incredulous.
+During his time there in the Forties, all political views had been accepted.
+"But it's the best place in the world," he said pathetically.
+They wouldn't do not among dreaming spires.
+Even my Communist friends always had impeccable manners.
+His rheumy eyes began to cloud.
+Give it a chance.
+I'm sure it's all just a tease.
+It would break my heart if you left.
+Exhausted by my frequent trips to London, my emotional resistance was deteriorating.
+A male friend of mine, also a Tory supporter, had succumbed to pressure and renounced his creed.
+During a tutorial the following week, when another history don had suggested, in complete seriousness, that I was an "enemy of the people," decided do the
+Inwardly blushing with I admitted to being "brainwashed by my parents" and called them "old fools."
+The respite was short.
+It was my father who the nail into the coffin of my Oxford career.
+At the time, he wrote two columns in the Murdoch press each week.
+My door was locked.
+I cowered inside, after five minutes, my pursuers up.
+When they left, I a suitcase and caught the first train to London.
+I never went back.
+You may call me a snivelling wimp.
+But no 18-year-old should be subject to such intimidation and vitriol in an educational institution.
+Even tragic is that it was Oxford, which not only produced 14 Tory prime ministers, but, to this day, hides behind an ill-deserved reputation for equality and freedom thought.
+"Valentino prefers elegance to notoriety"
+On the occasion of the "Valentino: Master of Couture," an exhibition that opened week in London, ABC speaks with Naty Abascal, Fiona and other of the Italian designer's famous clients.
+Somerset House, former home of Queen Elizabeth I of England, is the only place in the British capital worthy of hosting Valentino Garavani exhibition.
+inauguration of "Valentino: Master of Couture," the designer acknowledged a retrospective apotheosis that brings together over 130 couture gowns created by his fashion house over past 50 years.
+"I love this palace" he says, in his unmistakable Italian accent.
+This exhibition is the culmination a story whose only protagonist is "signore" Garavani, although it could not have been written without his distinguished
+Valentino has always been fascinated by the rarefied and distant world of the nobility.
+In the first room of the exhibition, open until March 3, there are a number of private letters and photos by the cream of aristocracy, from Princess Salimah Aga Khan, Lord Snowdon, Princess Marie-Chantal of Greece Margaret of England.
+Valentino exhibits these personal memories as if they were trophies of his social ascent from humble couturier in Voghera, northern Italy, to idol of the jet-set.
+There is nothing wrong with loving royalty.
+"At least they don't drop cigarette ends your magnificent carpet, like some beautiful pop music celebrities do," says Baroness Fiona Thyssen-Bornemisza.
+In the '60s and '70s, we both lived in the Alps were good friends.
+Valentino a spectacular host whose entertains with generosity and elegance.
+"We all loved being invited to his chalet in Gstaad" says "Heini" Thyssen's ex-wife, a close friend of forgotten beauties as Marella Agnelli and Eugenie Niarchos.
+Valentino has always preferred elegance to notoriety.
+And he is a star.
+Valeria Mazza, wearing a Valentino.
+The Argentine model Valeria Mazza also recalls the couturier's charisma.
+Many ago, after fashion show in Piazza di Spagna in Rome, we went for dinner at his flat.
+There were twenty of us, including Sharon Stone and John Kennedy Jr.
+You could see and feel his "spirit" in every detail of the flat and its decor, the food and the music.
+"All the guests were made to feel important and loved" recalls the top model, who started working with him during Haute Couture Week Paris, in 1995.
+"His designs are works of art and so never go out of fashion" she concludes.
+Nobility parade
+Garavani's life is not a story of obsession, but well reciprocated love.
+He loves well-educated people who come from backgrounds, and they love him.
+of the Somerset House galleries has been transformed into a glamorous, sixty-foot long catwalk which offers a role reversal: take the place of the models and have to parade down the catwalk while looking at a "audience" wearing Valentino masterpieces, for example, the dress Jackie Kennedy chose for her wedding with Aristotle Onassis, the costume Monica Vitti wore in "La Notte" and the wool and coat that belonged to Empress Farah Diba.
+In this crowd of mannequins, names stand out such as Sibilla of Luxembourg, Gloria von Thurn und Taxis, Mette-Marit of Norway, Rosario of Bulgaria and Sofia of Habsburg.
+Naty Abascal and the designer, in 2006
+Many of these clients say your first Valentino is like your love, "impossible to forget."
+I remember it perfectly.
+It was a pair of trousers, a shirt, a "gilet" waistcoat and jacket from the 1971-1972 autumn-winter collection.
+"It was a gift gave me" says Naty Abascal, one of the designer's muses.
+"I prefer him to other designers because of his femininity, his great love of women, and he enhances our beauty" added the former Duchess of Feria.
+I love the colours he uses, they really stand out and "lend themselves" to your face.
+Their are perfect.
+The princess and fashion Patricia Giovampaola d'Arenberg also remembers the first time she wore a Valentino.
+As a teenager living in Italy, I dreamed of reaching the age when I'd have the chance to wear one of his evening gowns...
+My time finally came in the late '90s.
+I bought my first Valentino dress to wear at a in the castle belonging to my cousin, Prince de
+It was a red dress, with a frilly skirt, draped "corsage" and a neckline.
+"It was a dream come true" says Princess D'Arenberg, the widow of Rodrigo d'Arenberg.
+"Valentino is indifferent to fashion, obsession is timeless" says this Italian aristocrat who lives between Paris, New York and Buenos Aires.
+Princess D'Arenberg looks her gowns with utmost care ... because a dress not just a dress, it's also the many memories go with it."
+The "king" of fashion
+The "grand finale" of the Somerset House exhibition is Marie-Chantal Miller's wedding dress from her marriage to Paul of Greece in 1995.
+It took four months' work and 25 "girls" (as the designer calls his seamstresses) to create the pearl-encrusted, ivory-coloured silk gown with twelve different types of lace and a train four a half metres long.
+According to journalist Suzy Menkes, the leading authority of the specialist press, that dress represents a fashion milestone the late 20th century, return of high society clients."
+Dazzled for with "savoir-être" of the elite, Valentino now its finest exponent.
+Cavaliere di Gran Croce (the highest-ranking distinction in Italy), Cavaliere del Lavoro, Commandeur de L'Ordre des Arts des Lettres, and awarded the Legion of Honour, Garavani accumulates many honours as any of his clients' husbands.
+"I've always struck by his refined and calm manner, and his neat and perfect appearance" acknowledges D'Arenberg.
+The last time I saw him was a month ago a gala dinner at the Orsay Museum.
+He was on the table of Countess Jacqueline de Ribes, a great friend of mine.
+"He was immaculate, time still for him."
+If a princess says that...
+The hardest job in the world: human of Kawah Ijen
+four euros, the Indonesian volcano risk life and limb carrying 70 kilos of sulphur along steep stone paths.
+There are people whom work is hell, and others who - literally - work in hell.
+This is the of Anto Wijaya, one of the 400 miners who make their living taking sulphur from the Kawah Ijen volcano, east of the island of
+To do so, he has to descend every day to the bottom of the crater, where the sulphurous gas from the bowels of the earth solidifies on contact with air.
+After breaking off large sulphur rocks, which in total can weigh up to 70 kilos, he carries them in two bamboo baskets on his shoulders along the steep stone paths.
+It is only 250 to top of the volcano, which rises to 2,386 metres above sea level, but the exhausted porters take over 40 minutes to get there, at snail's pace, keeping their balance and measuring their steps carefully avoid slipping and falling over the precipice.
+They know that one slip could cost them their lives, as happened to a French tourist who plunged to her death a few years ago on the hazardous Kawah Ijen cliffs.
+The Kawah Ijen miners are paid 5 euro cents for each kilo of sulphur removed.
+Once at the top, they make their way past the tourists who photograph them like circus monkeys then, heavy baskets, they walk three kilometres to the scales installed by a mining company a little further down, 1,850 metres above sea level.
+This is PT Ngrimbi Candi, a company which, since 1960, has been exploiting the volcano, and quite literally its workers, whom it pays 662 rupees (5 euro cents) per kilo of sulphur.
+It then sells the for 10,000 rupees (83 cents) to the petrochemical industry, as the mineral is widely used in everyday life and used in the manufacture of matches, fireworks, cosmetics, dynamite and even for whitening sugar.
+"We generally carry 70 kilos, so we get about 46,000 (3.8 euros) a trip" explains Anto, who usually make three trips a day.
+Each one takes three hours and end up exhausted, but it means he gets 138,000 rupees (11.5 euros) at the end of the day.
+Although it a for such an inhuman effort, it is three times what he would earn in the field.
+"Miners' wages are very high here, whereas coffee harvesting is paid 15,000 (1.2 euros) a day and the average monthly wage is two million rupees (167 euros) " explains the who previously worked as a mason in the island resort of Bali.
+There, his wage was 75,000 rupees (6.2 euros) a day and the work was not as hard, but Anto has returned with his family to Banyuwangi, a village near the volcano, for a compelling reason which, in Indonesia, is as overriding as the sulphur: "I married a girl from Bali, where they are Hindu, and I've brought her to Java convert to Islam."
+Anto has asthma, has difficulty breathing, coughs constantly and his eyes are irritated the toxic gases.
+At 27 years old, Anto has been risking his life for three in the Kawah Ijen volcano, and the sulphur has to take its toll on even though covers his face special mask and
+He has asthma, he has difficulty breathing, coughs constantly and his eyes are irritated the toxic gases from the
+This is the price you have to pay to realise dreams.
+"I'll go on working two more years because I to open a shop or study Spanish or French" he vows in more than acceptable English.
+Punished for life, this pleasant, young man could be a tour guide, waiter or hotel receptionist, but instead he does the work of a mule.
+Sharing a filthy wooden hut with other porters, he gets up every day at two in the morning because the sulphur doesn't stop flowing at when its characteristic yellow colour turns blue and it glows in the dark.
+Defying the shadows, Anto descends the crater and lights the path with a small torch attached to the helmet he bought with his money.
+Some 400 porters carry sulphur on their shoulders from the crater.
+Despite their huge profits, the mining company has not mechanised the sulphur extraction process to save costs, nor has it provided any equipment for the porters, who work for themselves and by the kilo.
+In fact, they do not even see any of the 30,000 rupee (2.5 euro) per camera surcharge that, top of the rupee (1.2 euro) entrance the guards of this natural reserve charge to tourists who come to photograph the volcano and their human mules.
+"This work is for animals, people" Madrusin, a burly 42-year porter who has been working at Kawah Ijen for three decades, since leaving school.
+He can lift up to 110 kilos, ensuring that he will go on working "all he can" because he needs the money to educate his three children, aged between 18 and 10 years old.
+I won't retire, I'll die here because volcano has been my whole life.
+Although the sulphur burns your throat and stings your eyes when wind suddenly changes and traps the miners in the thick columns of smoke coming out of the they are so hardy that no-one complains of serious apart, of course, from their common respiratory osteoarthritis, knee pain and sores the which have been misshapen by the weight of the
+Balancing the on his back, Unainik can only carry 50 kilos now he is 53 old.
+Every day, he and his fellow workers break off 15 tonnes of sulphur the volcano, which three lorries move to the warehouse in Tamansari, 18 kilometres away along a goat path that passes through scrubland.
+"I won't retire, I'll die here because the volcano has my whole life" says Unainik, opening a mouth of gaps where teeth use to be.
+The oldest of his children, 30 years old, also works carrying sulphur.
+Time but poverty perpetuates from generation to generation in one of the hardest jobs in the world: the one done by human mules in the Kawah Ijen volcano.
+Singapore seeks babies to save its economy
+Singaporeans blame their stress and the cost of property and education for not having children.
+"Singapore's population needs to grow."
+I'm a patriotic husband, you're my patriotic wife, let's do civic duty and create life!
+It may seem unlikely that these verses are part of an advert for mint sweets, but spite of this - perhaps because of it - the video went viral on YouTube in Singapore earlier this year.
+The are part of a rap and make use of local references such as "Let's put a bao (bun) in the oven" make fun of the birth rate in Singapore.
+The advertising company that made the video, BBH, is hopeful that the advertisement will manage to focus attention to the problem in a fun way.
+Its creative director, Douglas Hamilton, says he wanted to use the power music to make people perform their "national duty."
+It's purely Internet thing, so we had to make it fun and amusing.
+It's the biggest problem facing this country.
+We are the world's worst at reproducing our own progeny, so we felt it was issue we had to address.
+We knew the Government had tried many like launching perfumes with pheromones or organising speed dating evenings.
+of these ideas may have been creative, but they didn't necessarily work.
+So we thought: why not be as creative possible to solve the problem, by composing a rap?
+1.2 children
+But the Singapore Government is not taking it so lightly.
+It spends USD 1,300 per year on policies to encourage to have more children.
+A government package for marriages and parents grants up to USD 15,000 per child, maternity leave and distributes tax benefits.
+But this has had little effect.
+Singapore is a rich, high technology city State in Southeast Asia, also known for the conservatism of its leaders and its strict social controls.
+The birth rate in Singapore, according to its national division, currently stands at 1.2 children per woman.
+The last it was over 2, as replacement rate, was in 1976.
+So why are Singaporeans not having children?
+Tan Wei Ming, Director of Marriage and Family Policy of the National Population Division, said that it is a result of "better education" and "a wider range of career opportunities."
+"This given people a wider range of options in terms of life goals priorities, beyond getting married and starting a family" he explains.
+These changes in social norms have contributed to increasing numbers of people who are single, and delaying marriage and births, which has resulted in a decrease the birth rate in Singapore.
+Meanwhile, an EU immigration policy aimed at dramatically increasing immigration cope with the population decline has created resentment among the local population.
+In Singapore, there are where xenophobia against many new immigrants is widespread and thinly disguised, especially the Chinese who are criticised for keeping wages low and not integrating.
+Increased immigration is also seen as one of the reasons why, last year, the Singapore ruling party experienced its worst election result since independence.
+Since the election there has been an attempt to correct the problem, with highest taxes and levies for foreign workers.
+Unexpected consequences
+While a fall the birth rate has known effects on a nation's economic growth, tax revenues, healthcare costs and immigration policies, in Singapore's case there are also some unexpected consequences.
+The Government is trying not to build so many small houses.
+For example, it has to influence the real estate sector.
+Its urban authority has started to control the number of small apartments, known as "shoe boxes," which can be built in certain areas of the city.
+These apartments have surface of 46 square metres and have very successful in terms of sales.
+However, there concern that they may promote a single-living and discourage developers who want to build large family houses.
+But, Lim Yew Soon, managing director of the real estate company EL Developers, says his "shoe boxes" sell much faster than larger units.
+They are more popular, in the sense that the units sell days, even weeks, faster than larger units.
+This they are much better for our cash flow.
+However, he admits that the new regulations give clearer guidance to developers, who previously had obstacles put in their way if they provided for too many small units in a project.
+Too stressed
+Singapore is a city
+these new rules may be a step towards increasing the national birth when talking to Singaporeans working in the central financial district, it they will much impact.
+"People are very stressed, houses are expensive and so is education, a lot of people are off having a family" says a young executive.
+Other people can have children.
+me, it is important to have own money and time" says another young man of around 20 years old.
+Men and women alike mention their careers, stress and the cost of property and as the reasons preventing them from having children.
+So, much as the Government is trying to encourage its citizens to have children, when it comes to babies, the Singaporeans have the last
+What is private offline is private online
+Privacy.
+According to the Spanish Royal Academy Dictionary, it means the quality of private life or "the level of privacy which a person is entitled to protect from any
+What is privacy for an under 16?
+How do you apply this definition to their daily life and social networks?
+Do they understand the dangers they are exposed to by airing information over the Internet which they probably would not share offline?
+ElPeriódico interviewed five aged between ten 15 years old who are frequent Internet
+In four cases, they the term with "something very much mine" on a personal level, and "in the user name password" when to social networks.
+"I wouldn't upload my secrets in a post" says Jorge, aged ten, when to explain the meaning of privacy on sites such as Facebook, Twitter, Hotmail and Windows Live Messenger, with which he has had accounts for two years.
+"They are very secret secrets, I'll tell my mother, but not everybody" he says.
+On FB I upload nice pictures or games.
+And I have fun people I know.
+"I wouldn't share a photo that isn't mine, that belongs to somebody who's doing something stupid" he says.
+The child that it is bad to post obscene pictures of naked people, crimes, or write humiliating or aggressive comments.
+Jorge says he knows the 35 friends he has on FB and his nine on Twitter.
+Most are relatives.
+His mother is included, and she has the password to one of the accounts.
+I opened Twitter to express myself post interesting tweets.
+"I don't know if they answer me, I only upload them" he adds.
+networking is fun, I can talk quickly to relatives far away or my friends" says.
+He does not hesitate to reply that he would never accept a request from an unknown person.
+Nor would he take any notice of someone who recommends a stranger to him.
+The case of Joseph, aged 14, is different.
+This teenager has accounts with Hotmail, My Space and Ask, and in the last case he admits not knowing of the people added his friends
+"It doesn't bother me, because we have something in common, like music" he says.
+The boy says that no-one has suggested anything to him or asked him for his home address or phone number.
+"If they pressured or asked me for I'd just delete them from account" he states.
+Joseph became a follower on Ask, after reading a recommendation on Twitter.
+This teenager is not alien to experiences of what is now known as cyberbullying.
+An acquaintance of a friend of mine being pestered on a social network.
+They were threatening him demanding money from him.
+"I never found out who it was" he says.
+The victim, according to José, did not close account.
+"He just made it private."
+He then explains a series of steps to the account safely.
+Unlike Jorge, this would upload photos of acquaintances in uncomfortable or embarrassing
+I do it if I didn't like somebody, or they made me want do it.
+"However, I know that's cyberbullying" he
+Key questions
+Marielos Porras, an teacher with a degree in Education Learning, believes that to guide children and teenagers, they should understand that the purpose of social media is to inform.
+"The Internet emerged as means of searching for information, but with the appearance of these websites, the rules of the game changed" he says.
+Porras says the scholar Marc Prensky, a Master's degree in Education from Yale University and author of the work Digital Natives, Digital Immigrants, coined these terms to explain the phenomenon.
+Digital natives are those children and young people born with technology.
+"We are the digital immigrants who have to teach them, when we are still in fact learning" he
+He says that the is complex, "because we are asking them to have a clear policy on what is appropriate or not to disclose, publish or divulge, an age at which maturity is not conducive to this."
+"They also have to be selective when what matters most is to be popular and have thousands of friends, without thinking of the consequences" he adds.
+According to the specialist, the most effective way to teach children and teenagers what privacy is, is through questions that make them
+"Telling them not to do it is no good" he
+Porras then some options: There are things you tell a stranger, so why do it online?
+Or, would you like a friend to publish a photo of you like the one posted of a friend?
+you know what others publish about you?
+When tagging party photos, did you ask the other people's permission to tag them?
+And one more question: does everyone need to know what you're doing all the time?
+Another point is to make them see that they must behave as they offline.
+The rules are the same.
+"Outside the Internet, people act with respect, morality and other principles, so they should act the same way social networks" he says.
+Monitoring
+Stuart Guard, a university professor, primary school teacher and educational consultant, says it is essential for parents to read social policies thoroughly.
+By understanding all the clauses, they have solid grounds talk to their children about the implications of opening online account.
+"For example, the age at which you are allowed to share or publish" he says.
+According to Guardia, it is important to remind children the "don't talk to strangers" lesson.
+Unasur Summit closes without making public the Lima Declaration
+The Sixth Presidential Summit of the South American Union of Nations (Unasur) concluded today in Peru without making public the Lima Declaration, previously announced and theoretically signed by the seven attendee leaders.
+repeatedly tried to gain access to the document signed the UNASUR Meeting of Heads of State and but Presidential and Chancellery sources initially said they deliver it after the summit closed, but later they claimed that it will be published at some point on the Peruvian Government website.
+When asked text, they pointed out that the content had disclosed by Peruvian President, Ollanta Humala, during a brief statement to the press.
+Journalists' access to information from the Summit was restricted all times.
+During the summit, in the press room, only video was aired, with no sound, showing the presidential meeting with the message "closed session, audio restricted."
+The little information that reporters was given by the press spokesmen of some of the UNASUR governments attending the meeting, but the Peruvian Government.
+The only released during the summit was the list of attending which angered hundreds of journalists from various national and international media, who more details.
+The Peruvian President then sent an email to the media with the "final statement" of the summit, but this was Humala's statement, and not the official document that closed the summit.
+Last October, Peru hosted the Summit of South American-Arab Countries (ASPA), and this time, repeated requests from the press, the previously announced Declaration again not made public.
+The ASPA official website confirms that the document was last Tuesday.
+At international events, the Peruvian authorities were pains to ensure that there were broadcasting systems assured for all the journalists, but limited the obtaining of a maximum.
+The summit also concluded with joint commitment of Chile and Peru to accept a by the Hague Court to adjudicate a border dispute between the two countries.
+The Presidents Peru, Ollanta Humala, and Chile, Sebastián Piñera, met during regional event and confirmed that they respect the of the International Court of Justice (ICJ), which on Monday, at The Hague, will start to hear the arguments of both parties, in the lawsuit Lima filed against Santiago.
+"We will obey and execute the order that currently defines the differences we are bringing before this international court" said Humala, together with Chilean counterpart.
+"Chile has been, is and will remain a country that respects international law and peaceful resolution of disputes, treaties and international courts" added Piñera, greeting Humala with a handshake, alongside the flags of the two countries.
+Confirmation of both presidents that they submit to the ICJ came after Colombia this week denounced the Bogotá Pact, whereby it accepted to submit to the judgement of this international court, following a decision on its maritime boundary with Nicaragua which regarded as seriously flawed.
+The summit was held with the absence of the Presidents of Brazil, Dilma Rousseff; Hugo Chavez; Bolivia, Evo Morales; and Argentina, Cristina Kirchner.
+Paraguay, which was suspended by in 2011 after the dismissal of former President Fernando Lugo, was not involved in the meeting.
+Host President Ollanta Humala was responsible for opening the session in the closing the summit, just after noon in Lima.
+The President read the final document which reported that 16 agreements were adopted and the action plans laid down for 31 projects between the South American countries, for a total of 17 billion dollars of investments.
+Among the resolutions adopted, it was that UNASUR take "important steps toward the goal of a South citizenship, for which residence agreements are being extended."
+He reported that actions are being implemented to improve "cooperation in the fight against insecurity and transnational organised crime, actions to make medication more accessible, low-cost Internet access in all areas of South America, and to deal jointly and with risks of natural disasters."
+With Europe in crisis, "economic consolidation (in Latin America) should not have a triumphalist attitude but should to expand its productive matrix and glimpse a better future for its people" Humala added.
+"We decided to focus on a group of 31 flagship projects that will improve connection areas of South America, especially in rural and border areas... uniting countries and creating new economic networks" said the Peruvian President in a message read out.
+Among these projects, he mentioned that five are in Peru and are located in the transverse of its territory, between the coast and Brazil, and two focus on increased connection with Ecuador, although he no further details.
+Also, the final document mentioned the political situation in Paraguay.
+"We hope the electoral process in that country serves to reincorporate it in the Union of South American Nations," from which it is currently excluded.
+The need for Latin America to remain a prosperous, peaceful and integrated nation, with good neighbourly relations, was another issue highlighted by the summit.
+In this sense, the President of Colombia, Juan Manuel Santos, said before the start of the regional event that he expected to meet with his counterpart from Nicaragua, Daniel Ortega, on Saturday in to respectfully discuss the maritime dispute after the failure of the ICJ, questioned by Bogota.
+"The day after (Saturday) I might have a meeting with President Daniel Ortega" Santos
+"We will review all these paths, [which] are not exclusive, and the treaty with Nicaragua will a conversation with Nicaragua" he emphasised.
+"With President Ortega, I hope I can say that we handle this in the most civilised and respectful manner possible" said Santos.
+Santos Ortega are due to meet on Saturday in Mexico, where they expect to attend the of the country's new President, Enrique Peña Nieto.
+Also, as part the summit, the bloc's foreign defence ministers met in advance to approve the 2013 Action Plan, which seeks to strengthen dialogue and consensus on defence in the region.
+Argentina, Bolivia, Colombia, Ecuador, Peru, Brazil, Uruguay, Chile, Guyana, Surinam and Paraguay make up UNASUR, although the latter currently suspended.
+Peru has the pro tempore presidency of the regional bloc.
+"South America should learn from Europe to integrate citizenship" says Rafael Correa
+The President of Ecuador, Rafael Correa, said today that the creation of a common citizenship is a goal that "South America, in this case, must learn from Europe."
+Correa, who took part in the Eleventh Presidential of the Union of South American Nations (UNASUR) held in Lima, told Peru's state television that Europeans "killed one another in the Second World War" and other conflicts, "but now practically one country."
+To this end, he defended the project to establish South American citizenship encouraged by countries of UNASUR.
+"We have to achieve free movement of citizens and workers for any South American as is already the situation with members of the Andean Community. However, there are still reactionary sectors that want us to return to the past" he said.
+The Ecuadorian President was also in favour of the restructuring of the Organisation of American States (OAS) under the premise of reducing the influence of the Anglo-Saxon states and taking into account those who have signed the Pact of San José human rights.
+Those who speak with authority commit to anything, whereas we South Americans sign everything.
+"It is incomprehensible that Inter-American Commission Human Rights is in Washington under US funding" he said referring to Ecuador giving political asylum to WikiLeaks founder Julian Assange.
+Correa said he does not that decision because with it he has not betrayed his principles, but has respected his "deep democratic and human rights values."
+He added that, at the time, "he had reasonable suspicion Assange would be extradited another country and that his case would be respected."
+Additionally, he criticised the Swedish courts for demanding he be subject to questioning for sexual offence in his country, when "Swedish legislation itself dictates that he can be questioned via videoconference, which could done from the Ecuadorian Embassy in London."
+Correa said that there is a risk of deterioration of Assange's physical and mental health.
+"I have not spoken to him since he was at our embassy, but the ambassador informed me that he had a minor lung problem, nothing serious" said the Ecuadorian President.
+What there is, is the danger that his physical and mental health deteriorate due to being locked in a small space without any outdoor exercise.
+"That would complicate the health of any person" he added.
+Correa said that the solution to the asylum granted to Assange in June by the Ecuadorian in London, through the issue of a pass that permits travel to Ecuador, in the hands of Great Sweden and the European legal authorities, and stressed that there have been talks with London to seek a solution to the imprisonment of the WikiLeaks founder.
+We do not negotiate with human rights, we do not use that word in this case, but there have been ongoing discussions.
+"The solution to this problem in hands of Great Britain, Sweden and the European authorities, because Assange's lawyer, Baltazar Garzon, is handling a series of cases in different European courts" he said.
+And he felt that "if Britain says no to the safe-conduct it's over."
+And Sweden, as its legislation perfectly well allows it do, and as it has done in other cases, questions Mr Assange at the Embassy of Ecuador in London, him via Skype tomorrow, this is over.
+Correa took the opportunity to reassert himself as a defender of freedom of the press and stated that what he does not tolerate is "the mediocrity, dishonesty and lies that undermine the of expression."
+"The greatest enemies of the press freedom are not evil and wicked politicians, but bad journalists depending on profit, blackmail and extortion" he said.
+In that regard, he welcomed the fact it was no longer these journalists, "or the bankers or bourgeois and hegemonic countries that dominate Ecuador" and said that, if re-elected, he will "step up the revolution to continue on the same path and in the right direction."
+Correa also supported the decision to maintain the veto on in UNASUR, at least until their next elections, arguing that the body "must firm and tolerate opportunism and a coup masked with legality" because this will in fact "destroy the legitimacy of Paraguayan democracy."
+The Ecuadorian President also considered the "perfectly pertinent" desire of his Colombian counterpart, Manuel Santos, to now negotiate with Nicaragua the maritime boundary between the two countries, after the ruling the International Court of Justice in The Hague, in favour Nicaraguan maritime sovereignty.
+For now that ruling is not being followed.
+It is a problem a South country and a Central American one.
+Conflict is inevitable, but overcome by the desire to walk together.
+They need to be processed in comprehensive manner to overcome them and move forward.
+Additionally, he in a sound conclusion to the boundary dispute opposing Peru and Chile in the same court and that "it is right for Latin America to refer to international if both countries agree to losing, however hard it may be."
+With reference to the possibility of his standing as a candidate in the upcoming presidential elections in Ecuador a third consecutive term, he said he sees that possibility "with much optimism and joy, although at times is pretty hard."
+Correa said that if he loses the elections in February 2013, he will retire from public life.
+Personally, I've never been interested in power, but in situations as as those in Ecuador, socio-economic poverty can only be corrected by political power.
+"My political movement believed that it was me who that probable victory, so we have to accept that responsibility" he said.
+If I won, it would be my last period in office and then I would leave public life.
+If I lose, likewise.
+"It's a decision" he confirmed.
+Correa also referred to Venezuelan Hugo Chavez's new health treatment in Cuba.
+just with Venezuelan Vice President Nicolás Maduro and he tells me Chavez went for treatment that was already planned, routine treatment, and it was expected he would win the and return to Cuba.
+"This does not mean a health relapse for President Chavez" he said.
+In Lima today, the Ecuadorian Head of State attended the Sixth Summit of Heads of State and Government of the Union of South American Nations (UNASUR), which concluded with calls for greater regional integration to sustain progress, equality and security.
+Deaths caused by AIDS are nowadays due late detection
+Fabrizio was 21 years old when they confirmed his test result: HIV positive.
+"It was like a bomb dropped he says, recalling the time of the announcement, which the doctor was trying to make "softer," apparently unsuccessfully.
+The boy hid it from his family.
+He decided to illness alone and began to learn about it; thanks to his efforts he has just celebrated his 43rd birthday.
+He is undoubtedly one the oldest patients in HIV Unit of the Guadalajara Civil Hospital (CHG), where he arrived in 1994 after several battles with health.
+Fabrizio has lived with the human immunodeficiency virus (HIV) for 22 years, hard to imagine in the early '90s, when there were many questions, few treatment options and a deal of stigma.
+Then, even the of an IMSS [Mexican Social Security clinic refused to discharge him "because he had a
+At that time, having Aids was synonymous with death.
+Now it is possible to survive the syndrome and do so with quality of
+However, many people are still unaware of their illness, and seek help when the virus has caused havoc, "exhausted" their immune systems and they are suffering from opportunistic infections.
+31 years of the onset of AIDS around the world, at least since the first reported cases, "the great achievement at this time is that the life expectancy patients starting treatment in good time and the life expectancy of the general population is exactly equal" stated the head of the CHG HIV Unit, Jaime Andrade Villanueva, saying that this information was endorsed in April this year in a prestigious scientific journal.
+Infectious disease specialist and expert in HIV/AIDS, Andrade Villanueva said that 2008 scientists had concluded that AIDS was not a death sentence, but that life expectancy and of life depend on of damage to the immune system that patients present they are diagnosed, with a higher life expectancy for non-drug users: up to 30 years for with CD4 count and 50 years for those reporting a 500 count.
+In simple terms, this means that anyone HIV positive 25 years old, under these terms and "as long as they keep under control, can live with no problems to 75" said the interviewee.
+To gauge progress, it should be remembered that the average life expectancy of Mexicans today is 76 years.
+Although mortality has dropped significantly in recent years and, in the case of Mexico, the number of people of has fallen from 6,678 in 2007 to 4,862 in 2011 (UNAIDS annual report), it true since the advent AIDS, 60 per cent of patients in the national database have died.
+In Jalisco alone, only people died in 2011, and there have been 187 deaths up to May of this year; however, we are assured that there has been universal access to antiretroviral drugs since 2005.
+Why are do still deaths occur?
+- I think the problem is to do access to treatment.
+That's how I view and that's how it's been at our hospital.
+For at least the last 12 we've had no shortage of medicine, problem is that patients arrive in an advanced state of illness because they are unaware of their HIV status, that to say, the stages of the disease.
+He gave a compelling statistic: "Nine out of ten patients arrive when they already have an opportunistic infection, so what needs to be done to have a greater impact on overall mortality is to make earlier diagnoses and, therefore, offer mass detection tests for who needs them. "
+Specialists and officials of the State Council of AIDS Prevention Jalisco (COESIDA) agree on this proposal, as do the patients themselves, such as Fabrizio, who came to be tested at a private laboratory, motivated because a friend had done so despite his young age, he was around in the AIDS era and had even suffered Kaposi sarcoma, a cancerous tumour that is one of the common complications.
+Everything changes when you know you have AIDS.
+Some people think they're going to die and don't want to know anything.
+"If I'm going to die, I'd rather have a blow-out three times a week" they say, but not in my case.
+The change was the better; I eat well, I I my drugs.
+To date, his parents are only aware he had cancer.
+I live as normal a life as anyone else.
+"I work, I part in a lot of I travel, I an active but responsible sex life, I take care of myself and the other person" Fabrizio, who agreed to share his intimate secrets with MILENIO JALISCO, to motivate those people with his story who today, in the context of World AIDS Day, are afraid.
+They should get tested if they are at risk. because the sooner they know if they are HIV positive, the better, and if they have already been diagnosed, they must learn to live like any other person, while being responsible.
+This is his message, which summarises the theme of the fight against AIDS in 2012.
+Condoms behind the counter.
+The gaps between health programmes and ordinary citizens are huge, said Ricardo Salazar, a journalist from Guadalajara who has taken up the HIV cause.
+And the greatest cure is prevention.
+In places dedicated to this task "the distribution of condoms has actually increased; previously, they used to give us one or two, now they give a hundred, and that's fine, but it out there are still people out there have no access condoms" he said.
+Among the most vulnerable to new infections are teenagers.
+"Why do you want them?" is a common question, asked with sarcasm and according to the values of social workers, counsellors, pharmacy workers and healthcare staff who do not want to expose to sex, said the speaker.
+It was decided to change such inefficient allocation, and that condoms should not only be placed behind but packets of one should found in public toilet dispensers in places frequented by young people.
+This is not promoting promiscuity.
+It is about paying for their beers motel fees, as Governor Emilio Gonzalez said, when asked if there would be distribution of condoms during his administration.
+"And it's not about sexuality, but it is best to provide condoms to those already practising sexual activity" he said.
+Jalisco key points
+There are 13,435 cumulative cases (12,158 AIDS and 1,317 HIV).
+The state is 4th in the nation in new and cumulative cases of AIDS and 13th in HIV.
+92% of new infections are through sex, 6% via the bloodstream and 2% perinatal.
+An estimated 50,000 people may be living with HIV, as for each registered case there around 4-5 people who not know they are positive.
+Ratified by a United States court of appeal, a judgement which ignores the of the Vitro Group's debt achieved via a bankruptcy in Mexico, the scenario is an ominous precedent for any national company with offices in the neighbouring country that has solvency problems.
+It seems, then, that the proceedings in support of survival of firms permit Mexican law are not valid in land of stars and stripes, contrary to international conventions.
+In practical terms, the endorsement of the judgement delivered on 15 June by Judge Harlin Hale the Bankruptcy Court of the Northern District of Texas, leaves Mexican firms defenceless against possible seizure of their outside of Mexico.
+However, the decision opens the door for the leading glass manufacturer in Mexico to appeal to the Supreme Court of the United States, claiming three inconsistencies.
+From start, while the trial judge notes that creditors should be governed by United States the Court of Appeal for the Fifth Circuit, in New Orleans, states that the main action is the insolvency action handled in Mexico.
+The first would involve ignoring international procedural cooperation in cases of insolvency of companies with transnational
+Indeed, the UN Model Law for International Trade Law Uniformity was created for this purpose, with American Law Institute positioned as arbitrator.
+Secondly, the judgement establishes that without the intercompany vote, with the debts the Vitro subsidiaries had with their parent company recognised in the critical mass of the the majority needed to approve the restructuring might not be achieved.
+However, law recognises the possibility.
+In fact, the Vitro case was the first one in which the scheme was accepted.
+There are half a dozen examples, including Agremex and Commercial Mexicana, whose intercompany debts were endorsed by the Federal Bankruptcy Institute.
+What is also certain is that, not including the votes of subsidiaries, the Vitro creditors who fought against it in the courts, namely "vulture" such as Aurelios Capital, Aurelios Convergence, Elliot International and Limited, did not achieve a
+The was percent versus 37.
+This data is omitted by the Court of Appeal.
+From another perspective, the latter blames Vitro for the difficult situation it has faced since 2008, while trying to avoid the severe economic crisis faced by United States, turning its on the country.
+For now, the Gonzalez Sada family firm has lodged a motion for reconsideration before the Court of Appeal for the vote to reach the plenary of the court, that is, the five judges, given that only three voted previously.
+Should this fail, an appeal for review by a higher court, in this case the US Court, will be filed.
+The real problem is that the court bypassed a document sent by the Government of Mexico in the capacity of amicus curiae ("friend of the Court"), which details the procedure followed by Vitro under the framework of the Commercial Insolvency Law, noting that the discharged itself with adherence to the agreements signed by the two countries to link it with Chapter 15 of the Bankruptcy Act of the United States.
+Moreover, it should be noted that the country yielded to the principles the United Nations Commission on International Trade, is the rules set for cross-border insolvency cases, ensuring fairness for debtors and creditors.
+Double whammy: Vitro hit and country hit.
+Balance Sheet
+With complaints put on the table by the unions of Mexicana Airlines against the former owner of the company, Gastón Azcárraga Andrade, who is accused of mismanagement, dormant for several months, the Airline Pilots Union Association already found the bottleneck.
+The proceedings headed by Carlos Diaz Chavez Morineau has just filed a criminal complaint against the National Securities Commission, which is accused of obstructing
+The claim is that the supervisory authority has consistently refused to provide to the Attorney General's Office on a transaction carried out by the employer remove 198 million pesos from trust F/589 of Banco IXE, on behalf of de Aviación.
+The resources were apparently channelled towards the purchase of shares in the Administradora Profesional de Hoteles.
+As you know, Azcarraga Andrade is the main shareholder of the Posadas hotel chain.
+Opposing Dragon Mart
+A group of local and foreign environmentalists, academics, businessmen and members of the public gathered at the weekend at a forum at the University of the Caribbean to approve the creation of a broad front to oppose the opening of the Chinese Dragon Mart in Cancun.
+As you know, we are talking about a huge sales and distribution centre in Mexico, Central America and the Caribbean, selling Chinese products, with a residential area at the bottom for employees of 150 companies.
+Previously, Canacintra had managed to unite the governors of the southeast of Mexico to oppose the building that destroyed part of protected area and represents the mother of all threats to industry.
+The of ACTA
+Government ignored an order of Senate to explain under what terms and conditions the Mexican Ambassador in Japan signed the Anti-Counterfeiting Agreement, known by its acronym ACTA, according to the Mexican Institute of Industrial Property, and the matter has been archived.
+As you know, the action was taken even though the Senate had ruled out the possibility, deeming it an infringement of freedom of expression on social networks.
+term
+In effort to repay long-term debt without affecting short-term debt, the housing developer Homex is placing securities exchange certificates on the market for 500 million pesos.
+The issue is the first of four identical issues which are offering to repay interest every 28 days.
+Birth of Competival
+A consortium under the name Competival has just been established, the companies NYCE, e-Quality and Kernet, leaders information technology, objective of which will be to market the services of software clusters in Central and South America.
+Investments in this area exceed USD 1.5 billion.
+Hector "Hetin" Reyes: has been my life"
+Basketball globetrotter Hector "Hetin" Reyes was involved in the sport for over 60 years and, thanks to it, travelled the world.
+Few people in Puerto Rico have a mental recollection of local basketball history as broad as that of Héctor "Hetin" Reyes.
+Reyes was immersed in the sport for over 60 years before confined to a wheelchair in 2008 following a stroke; he was a minor league player, National Superior Basketball player, BSN representative and manager with the Bayamón Vaqueros or President of the Basketball Federation.
+wore lots of hats in basketball throughout my life, including several at the same time, like when I was president of the BSN, general manager and federative president the Team during the '90s," recalled Reyes during Primera Hora's visit to his home in Bayamón, where he lives with Isabel, his loyal wife for over 50 years.
+"Basketball has been my life."
+Reyes is not exaggerating when makes that statement.
+The walls of his house are almost totally decorated with pictures and memorabilia denoting his long career, which goes to prove it.
+Bayamón at heart
+Of them all, the ones he with the most emotion are the ones that remind him of his time spent with the Vaqueros, from the mid-50s as a player until 1982, when completed 15 years serving as co-agent or agent the franchise.
+"Those were my best years, the ones I enjoyed the most because I had the opportunity to be part of the Vaqueros' eight championships, since 1967, either as agent, co-agent or manager.
+There were many good years, the five consecutive championships from 1971 to 1975.
+And then I said goodbye with one in 1981, Jerome debut year in the BSN.
+"Cuco" Ortiz took over - he was a great manager" said Reyes.
+I remember that Bartow, who had directed here and was at the University of Alabama (Birmingham), said to 'I've got a very strong player you, 6'7" tall.
+Do you want him?'
+And that the beginning of Mincy, one of the best players Puerto Rico ever had.
+Bartow then recommended the sharpshooter Gausse Raymond, who established residency here and was one of our best shooters.
+remember him saying that if Mincy had given Bayamon one championship, Gausse would help get another.
+The Vaqueros' championship with Gausse was enjoyed, but from a distance, because in 1988 he was already becoming a federative bigshot.
+that time, he preferred to enjoy his own and Mincy's accomplishments in the national team.
+I remember we beat the United States for the first time during the 1989 Pre-Olympics in Mexico.
+Then came the 1990 World Cup, where came fourth and it should have been bronze, but for the Canadian referee who made us repeat the final for the second time, said Reyes.
+Is the 1990 World National Team the best you've ever seen?
+It's one of the best, as good the one that beat the Dream Team in the 2004 Olympics.
+However, my favourite was the one in the 1991 Pan American Games in Cuba, when we won gold and gave the US team a beating, which was quite similar to the time we won bronze at World Cup.
+That team not only again included Gausse, Ramon Rivas, Fico López and 'Piculín' (Ortiz), but also the young (Javier) 'Toñito' Colón and James Carter, the Leon brothers Edgar) and Mario 'Quijote' Morales, who was kept out of the 90 team by a knee injury.
+A team that maybe was not best terms of members, but which gave us a gold medal and was a great joy to work with, was the 1995 Pre-Olympic team in Neuquen, Argentina.
+With role players such as 'Canito' Nieves, Pablo Alicea and the young Rolando Hourruitiner replacing the players suspended after shambles of the Mar del Plata Games, we won gold against all the odds.
+Who was the best Puerto Rican player?
+Without any doubt, Piculín Ortiz.
+His numbers at international tournament level are awesome.
+Nobody in Puerto Rico dominated at that like Piculín did.
+Not to mention his career in the various he played in.
+Who was the best Puerto Rican manager?
+That's a one.
+We had very good team, including Julio Toro, Flor Melendez, Carlos Morales, Raymond Dalmau, Armandito Torres.
+Of the youngsters, I really like the work of Leo Arill.
+What do you consider your greatest achievement in the federation?
+Having been part of National Team's most glorious era between 1988 and 1995 and in the early 90s the BSN had up 17 teams in a season.
+What was there left for you to
+There were things I'd have liked to implement, such as regionalising the minor leagues.
+For example, the boys of Ponce only play in their area and only get to face teams from other parts of the island in the national playoffs.
+Right now the riding and playing too much, unnecessarily.
+At least I see the of and a course for leaders, table officials and referees.
+That pleases me.
+What are you doing now?
+most I do is listen to music, watch music videos from my era on YouTube, enjoy my grandchildren and occasionally go to basketball games.
+And of course, enjoy the company of my wife, Elizabeth, who has always been with me.
+Larry Hagman dies
+Larry Hagman, born on 21 September 1931 Fort Worth (Texas), became world famous for role as John Ross Ewing, better known as "JR," in the television series "Dallas," in which he played a ruthless, malicious and manipulative businessman.
+Larry whose role as oil tycoon JR Ewing in the television series "Dallas" became a symbol of greed in the 1980s, has died.
+He was 81.
+Hagman, who returned this year as JR in a new season of "Dallas," died on Friday afternoon of cancer complications, according to a family statement provided to the Associated Press by the Warner Bros., producer of "Dallas."
+"Larry was back in his hometown of Dallas, once again representing the iconic role he most liked" the family said.
+Larry's family and closest friends were with him in Dallas for the Thanksgiving Day holiday.
+Linda Gray, who played wife in the original series and the sequel, was with Hagman when he died in a hospital in Dallas, said her publicist, Jeffrey Lane.
+He brought joy to all who knew him.
+He was creative, generous, funny, loving and talented, and I will miss him
+"He was an original guy and lived to the full" Gray in a statement.
+Hagman diagnosed with cirrhosis the liver in 1992 admitted that he had drunk a over the years.
+In 1995 a malignant tumour as in his liver and he underwent a transplant.
+Years before "Dallas," Hagman became famous on television as a decent guy the light "I Dream of Jeannie," aired on NBC from 1965 to 1970.
+He played Captain Tony Nelson, an astronaut whose life is changed when he meets an attractive genie, played Barbara Eden, and takes her home to live him.
+He also starred in two sitcoms that were not aired for long, "The Good (NBC, 1971-72) and "Here We Go Again" (ABC, 1973).
+His work included roles received by critics in "The Group," "Harry and Tonto" and "Primary Colors."
+But it was his masterful interpretation of delightfully detestable JR that led to Hagman reaching his peak of stardom.
+The drama series on CBS about the Ewing clan and other characters in their orbit aired from April 1978 to May 1991.
+The tagline "Who shot JR?," designed to generate hype around an episode full of emotions in which Hagman's character is nearly killed, international speculation and millions of risky dollars wagered in gaming establishments.
+It also helped give the series a at the time.
+When the answer was revealed in an episode in November 1980, an average of 41 million viewers tuned in and made "Dallas" the second most watched entertainment programme in history, after the final episode of "MASH" in 1983, which had 50 million viewers.
+It was JR's sister-in-law Kristin (played by Mary Crosby) who shot him.
+JR got her pregnant then threatened to say she was a prostitute unless she left town, but there were others who also had reasons to attack him.
+Hagman portrayed Ewing as a corrupt insatiable man with a charismatic smile: a dishonest entrepreneur and cheating husband who tried to have his alcoholic wife, Sue Ellen (Linda Gray), sectioned.
+"I know what I want on JR's tombstone" Hagman said in 1988.
+It should read: "Here lies the honest citizen JR Ewing."
+This is the only deal he lost.
+Victoria Principal, co-star of the original series, recalled Hagman on Friday as someone "huge, on and off screen."
+He is unforgettable and irreplaceable, for millions of fans around the and in the hearts of each one of us who was fortunate enough to know and love him.
+Ten episodes of the edition of "Dallas" were broadcast a few months ago with great success for TNT.
+He had already finished recording five for the second series and a sixth was in process, the reported.
+Immediately after, there was no statement from Warner or TNT about how the would handle the loss of Hagman.
+Hagman, born in Fort Worth, Texas, was the son of and singer Mary Martin, who starred in classics such as "South Pacific" and "Peter Pan."
+was still a teenager when she had him in 1931 during her marriage to lawyer Ben Hagman.
+He tried his luck in the New York theatre scene the early '50s, and later served in the Air Force from 1952 to 1956, in England.
+While he met the young Swedish designer Maj Axelsson and married her.
+The couple had two sons, Preston and Heidi, and lived for a long time in the Californian city Malibu, home to many celebrities.
+In 2001, he called his memoirs "Hello Darlin': Tall Absolutely True) Tales About My Life."
+"I didn't put anything in it that I hurt anyone or affect them in any he told Associated Press at the time.
+After his liver transplant, he became an organ donation promoter and worked as a volunteer at a hospital, fearful patients.
+"I advise them, encourage them, meet with them when they come for their surgery, and afterwards" he said in 1996.
+I try to some comfort, such as "Don't be afraid, it will be a little uncomfortable for a short time, but then you'll be fine."
+He was also an anti-smoking activist and took part in several campaigns.
+Start of a course that explores the "End of the World"
+Each week, students explore apocalyptic themes such as nuclear war, zombies, viruses and germs, and global warming.
+This term, when Professor of religion, Stuart Charmé, decided to give a course on end of the world, he knew he had a compelling hook: The end of the "long of the Mayan calendar, 21 which had convinced many people that the end of the world was coming.
+But Charmé had no idea what awaited him over the next couple of months: The cataclysmic hurricane Sandy, a fiscal precipice some called "debt Armageddon" and a growing conflict involving Israel, where end-of-the-world Christians theorists think the Apocalypse will begin.
+"I didn't realise this was going to be the most apocalyptic term ever" said Charmé week to students at Rutgers-Camden University (New Jersey).
+If you look at what has been happening in the world today as if we were at 30 days and this has been a really good period.
+And remember that bad is good for those with an apocalyptic mentality.
+And he is the only professor offers courses on the "end of the world" this term, theoretically last in history.
+At Temple, Associate Professor Barry Vacker is giving the course "Media, Culture and the end of the world."
+Each week, students explore apocalyptic themes such as nuclear war, zombies, viruses and germs, and global warming.
+"We looked at why these ideas proliferate over time" he said, and how they offer hypothetical scenarios that guide human behaviour.
+If nuclear material into the hands of for example, a war could break out.
+This month students analysed movies with an apocalyptic theme and explored how they compare with real-life examples.
+"I've tried to inform students about what is possible, probable, credible and impossible" said Vacker.
+At the main Pennsylvania State University campus, Latin American History Professor Matthew Restall, and his Amara Solari, an Associate Art and Anthropology Professor, have teamed up to give a called simply "The end the world."
+"We don't add '2012' so we always have the option of running course again, if the world doesn't come to an end" said Restall.
+Despite the "impending doom," students to study, undertake projects and take final exams.
+At Penn State, the final exam will on the eve of the Apocalypse, which students no choice but to work the very night the world is supposed to end" Restall.
+The proved quite popular.
+"It was fully booked within two hours" said Restall, on his course for students with high averages, which was filled with 35 students.
+We received emails for weeks and weeks before the start of the term, from people asking if there were any places.
+Students, meanwhile, say the course is one of the most interesting around.
+"I find it fascinating to see what people do to themselves" said Bridgid Robinson, a 23-year-old post-graduate Religion and Sociology student from Haddonfield, New Jersey, at Rutgers-Camden.
+And the secular or religious mentality is just a matter consolation or lack of it.
+Will Wekesa, a 25-year-old post-graduate Psychology and Nursing student, said he had seen the apocalyptic movies.
+"I'd never heard of a class that could teach it" he said.
+I enjoy it.
+But none the students interviewed - much less any said they believed in the end date of December 21st.
+"Our first project was about the Mayan prophecy and to a certain extent we discredited it" said Julie Zeglen, a 21-year-old final year student at Temple, from West Chester.
+The Mayans never predicted the end of the world: it is just a key point in the calendar, said Restall.
+But he said that Western culture suffers from apocalyptic anxiety, which goes back several centuries, in which people react to changes them predicting the of the world.
+The Internet has caused a boom these speculations.
+"In other places, people don't think about it" he said.
+It's mostly in the English-speaking world.
+Joseph Dougherty, a Professor of religion at La Salle University, who is giving courses in the this year, responded quickly to the question of whether he knew about any courses the "end of the world" there.
+"The Philippines are not taking part in the end of the he suggesting an exception a higher authority.
+We have an indulgence from the Pope.
+Restall noted that over the years there has been talk of many days of the last judgement, and said that if nothing happens on December 21st, "people will immediately start thinking of the next date" or philosophising that December 21st is beginning of a seven-year period after which the world will end.
+Students and teachers are taking the date lightly.
+Some said they plan to go to "end of the world" parties.
+"Maybe I'll call some friends so we can have a laugh together" Samira Ford, 20-year-old student.
diff --git a/tests/data/pred_real/train.real b/tests/data/pred_real/train.real
new file mode 100644
index 000000000000..1ad39240fd0b
--- /dev/null
+++ b/tests/data/pred_real/train.real
@@ -0,0 +1,3000 @@
+A Republican strategy to counter the re-election of Obama
+Republican leaders justified their policy by the need to combat electoral fraud.
+However, the Brennan Centre considers this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.
+Indeed, Republican lawyers identified only 300 cases of electoral fraud in the United States in a decade.
+One thing is certain: these new provisions will have a negative impact on voter turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Unlike in Canada, the American States are responsible for the organisation of federal elections in the United States.
+It is in this spirit that a majority of American governments have passed new laws since 2009 making the registration or voting process more difficult.
+This phenomenon gained momentum following the November 2010 elections, which saw 675 new Republican representatives added in 26 States.
+As a result, 180 bills restricting the exercise of the right to vote in 41 States were introduced in 2011 alone.
+The new election laws require voters to show a photo ID card and proof of US citizenship.
+Furthermore, these laws also reduce early voting periods, invalidate the right to register as a voter on election day and withdraw the right to vote of citizens with a criminal record.
+Before the 2006 elections, no US State required voters to show a photo ID card.
+Indiana was the first State to impose such a requirement.
+In 2008, the Supreme Court of the United States upheld the constitutionality of the Indiana law.
+The Republican authorities were quick to extend this practice to other States.
+Over the past two years, they sponsored bills in 34 States to force voters to show a photo ID card.
+It is important to note that, unlike Quebec, American citizens do not have a universal ID card such as the health insurance card.
+In fact, 11% of American citizens, i.e. 21 million people of voting age, do not possess a photo ID card issued by a government agency of their State.
+In addition, five million new voters in 2012 do not have such identification.
+And it often costs over a hundred dollars to obtain the required identity card.
+The new restrictions disproportionately affect young people, minorities and people with low incomes.
+In fact, 25% of African Americans, 15% of those earning less than $35,000; 18% of citizens over 65 and 20% of voters 18 to 29 years old do not have the required photo ID card.
+And that's not all.
+Students, voters considered to be voting more for Democratic candidates, are not allowed in several States to use the photo ID card issued by their institution.
+On the other hand, these same States allow fishing or hunting club members, who vote more Republican, to use the cards issued by these clubs when they vote.
+Prior to 2004, no State required proof of citizenship to vote.
+Arizona was the first to introduce such a requirement.
+Since 2011, a dozen States have adopted laws requiring voters to prove they are American citizens.
+These measures are clearly intended to limit the Hispanic vote.
+However, it appears that two out of three Hispanic voters favour the Democratic party.
+What is more, in 2011 Republican legislators sponsored laws abolishing the registration of voters on election day in eight States.
+In addition, they limited the right of individuals and groups to provide assistance to voters wishing to register.
+These restrictions are not without consequence.
+For example, during the 2004 general election, voter registration campaigns contributed to registering around 10 million citizens.
+However, the measures adopted since 2009 have led to a 17% drop in the registration rate of new voters in 2010 compared to 2006.
+In addition, Republican legislators have enacted laws in five other States aimed at reducing the early voting period.
+For example, during the 2008 general election in Florida, 33% of early voters were African-Americans, who accounted however for only 13% of voters in the State.
+The same applied to Hispanics.
+These represented only 11% of voters, but 24% of citizens who voted early.
+On the other hand, 76% of voters were white but these represented only 46% of early voters.
+Of course, Democratic legislators and their supporters vigorously opposed the adoption of laws restricting voter registration.
+Several bills were blocked by vetoes of Democratic governors.
+The United States Attorney General intervened to suspend the most controversial laws.
+They were able to partially limit the damage.
+For example, only 16 out of 34 States have adopted laws requiring the presentation of a photo ID card.
+However, the new rules put in place will undoubtedly make it more difficult to exercise the right to vote in 2012.
+Democratic critics denounce the partisan character of the laws that have been passed and they see a clear objective of influencing the 2012 results in key States.
+A 2011 Brennan Centre report shows that the States that have adopted these laws represent 171 of the 270 votes needed in the electoral college to win the Presidency.
+It is too early to say with certainty that these legislative changes in the electoral system will have significant impacts on the outcome of the 2012 presidential elections.
+But one thing is certain: these new provisions will have a negative impact on the turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Prostate cancer screening: take the test or not?
+Indeed, the PSA test sometimes shows erroneous results with false negative or even false positive results, which involve unnecessary medical interventions.
+Enough to make already reluctant men hesitate to take screening tests.
+Take the test or not?
+We asked two specialists for their opinion.
+In studies conducted in the United States, there was a lot of contamination between control groups, so it is difficult to interpret the data and make firm recommendations.
+Another study, this time a European one, concluded that there was a difference in mortality between patients who were screened and those who were not.
+This study also showed, with a follow-up after 12 years, that it is between 30 and 40% more likely for metastases to occur in the absence of screening.
+I therefore recommend the test from age 50, or 40 if you have a direct relative who previously had prostate cancer.
+African-American men are also more at risk.
+The key is to make the right decision once cancer has been detected.
+There are aggressive cancers and others that are indolent.
+The patient really needs to be made to understand the degree of risk of his cancer, by offering him the options available, not necessarily treating prostate cancers that are not long-term life threatening, and opting instead, in such cases, for active monitoring of the disease.
+Today, many men in whom cancer has been detected will not be treated because their cancer is not aggressive and is not life threatening.
+Active monitoring will be suggested, and if the disease progresses, they will be offered treatment.
+More and more, specific criteria are being determined in order to decide who should or should not be treated.
+Therefore I recommend taking the test.
+But the important thing is to have a discussion with your doctor to determine whether or not to take it.
+In collaboration with the Société internationale d'urologie [SIU], Movember has created a tool that makes it possible to evaluate the pros and cons of the PSA test.
+You can download the document (in English for the time being, a [French] translation will be available shortly) at this address: http://ca.movember.com/fr/mens-health/prostate-cancer-screening
+Preventing the disease
+Unfortunately, there is no miracle recipe for preventing cancer.
+Despite the progress in research, the adoption of healthy living habits remains the best way to reduce the risk of suffering from it.
+It is estimated that if everyone ate well and exercised enough, 30% of cancers could be prevented.
+"If no more people smoked, this rate would increase to at least 50%," says André Beaulieu, spokesman for the Canadian Cancer Society.
+On the other hand, it is estimated that roughly 10% of cancers are hereditary.
+Some are also completely unexplained.
+For the Canadian Cancer Society, the fight against tobacco remains a priority, despite the decrease in the number of smokers.
+Cigarettes are linked to 85% of lung cancer cases.
+It is also a risk factor for a number of others.
+This massively damages people's health.
+"Even today, there are 1.5 million smokers in Quebec" deplores spokesperson André Beaulieu.
+Encouraging data: 10 years after giving up smoking, the risk of dying from cancer drops by half.
+Weight
+Overweight and obesity are also conducive to the onset of the disease, according to the SCC.
+They can increase the risks of cancer of the breast, colon and rectum, oesophagus, pancreas and uterus.
+"Research shows that the regular practice of physical activity throughout your life protects against colon cancer" it is also said.
+Diet
+The organisation also recommends limiting your consumption of red meat.
+In large amounts, it increases the risks of developing colo-rectal cancer.
+Likewise, so do cured meat products, and these should be avoided.
+The conservation of meat by smoking, drying or curing can cause the formation of carcinogens.
+"They can damage cells in the body and lead to the development of cancer" it is explained.
+Vitamins
+In recent years, a number of scientists have studied the links between vitamin supplements and cancer.
+For the time being however their research is inconclusive.
+Studies on vitamin E are contradictory, according to the SCC.
+While one study noted a decrease in the risk of prostate cancer, another noted an increase.
+Also the effect of vitamin D on cancer is not clear.
+In addition, Mr Beaulieu emphasises the importance of discussing your concerns and family history with your doctor.
+"Taking a screening test doesn't give you cancer."
+The Higgs boson revealed
+The announcement of the probable discovery of the Higgs boson created quite a stir last summer, and with good reason.
+Indeed, it is believed that this boson is part of the mechanism responsible for the mass of everything in the Universe, no less.
+Also it is the last particle whose existence is predicted by the Standard Model - our best or "less worse" explanation of the nature and behaviour of matter - but which has not yet been observed empirically.
+But for physicists, it is still not completely sure that it really is the Higgs.
+We know without a shadow of a doubt that it is a new authentic particle, and greatly resembles the Higgs boson predicted by the Standard Model.
+In addition, new data unveiled this week at a large physics Congress in Kyoto seem to confirm this, but there are still insufficient data to be perfectly sure.
+But let's suppose that it really is the Higgs, since the chances of being mistaken seem slim, and see what it is.
+In our world, there is a fatally unavoidable law which states that two things cannot meet at the same place at the same time.
+There's no way to break this rule - and don't try too hard, you'll go mad.
+However, even though particle physics is a very strange world, it turns out that it also has a law of the same kind: the Pauli exclusion principle, which states that two particles cannot occupy the same space at the same time if they are in the same "quantum state" - this "state" consisting roughly of certain of their characteristics.
+Based on this, physicists classify particles into two categories.
+In one corner we have good citizens called fermions, who wisely obey the Pauli principle.
+While lurking in the other are the bosons, a nasty band of anarchists who respect nothing - at all events, not this principle, which means that they can indeed be found in the same place at the same time.
+These bosons are then divided into two groups, according to the Berkeley Labs Particle Adventure site (absolutely extraordinary, by the way): mesons, which we will not discuss here, and "force particles" by which the great forces of nature are propagated and to which the Higgs boson may be somehow related.
+These bosons, it must be stressed here, are not all such exotic bugs as you might think.
+In fact, if you can read this article, it is thanks to an extraordinarily banal boson: the photon, or the "light particle" which is the "messenger" of the electromagnetic force.
+When, in fact, a particle having an electric charge accelerates or changes direction, this "disturbs" the electromagnetic field in this specific place, rather like a pebble thrown in a pond.
+This "disturbance" produces an electromagnetic wave (of light, infrared, ultraviolet etc.), and this wave is nothing other than a photon - and thus one of the "force carrier" bosons.
+More stable field
+The same applies to the Higgs boson, with the difference that it is another field, the Higgs field, which must be "disturbed" for the boson to appear.
+Now, this Higgs field is much, much more stable than the electromagnetic field; to excite it, it is necessary to achieve very, very high energy levels, rather like a frozen pond which would need a very large rock to wrinkle the surface.
+Which is why a huge particle accelerator like the one at CERN - the Large Hadron Collider is a ring with a 27km circumference! - is needed to achieve such energy levels.
+The analogy with the electromagnetic field is again useful for explaining the relationship between the Higgs and mass.
+In fact not all particles, or all materials, interact with the electromagnetic field.
+Some, such as magnets, do so, but others don't - a piece of paper, for example, will never stick to a fridge.
+And likewise, not all particles interact with the Higgs field: those that do so have mass, while the others (such as the photon) do not.
+Now, what is it that all this research "can bring"? asks Ms Plamondon.
+For science, it serves to check the validity of the Standard Model (SM), and also allows physicians to examine any discrepancies between the observations and predictions of the SM.
+A number of people, moreover, fervently hope that some will be found, because the slightest difference could open a door to a "new physics" and plug certain holes in the Model.
+This, it must be said, still has huge shortcomings, offering no explanation for gravity (oops!) or dark matter, which forms approximately 80% of the matter in the Universe (re-oops!).
+But to date no such discrepancies have been found at CERN.
+Repercussions
+The repercussions of this research on the daily life of the man in the street are more difficult to predict, but it would be wrong to assume that there won't be any.
+Remember: in the very early 60s, the pioneers of the laser at Bell Laboratories did not suspect the revolution that would be triggered by their work.
+They had an inkling of the scientific applications, but nothing as to the rest.
+In fact, the late Willard Boyle - a physicist who worked at Bell Labs, where the laser was invented in 1960, and who himself developed the first continuous laser (the first were pulsed) in 1962 - told us that initially the laser was rather seen as a "lab gadget."
+Just imagine...
+And then, applications can also come from all the instrumentation that surrounds research.
+For example, the same Willard Boyle developed a small light sensor in 1969, during his work in optics.
+This sensor, although this was not at all the original intention, now serves as an "eye" to all digital cameras worldwide, and earned him the Nobel physics prize in 2009.
+This does not of course mean that the activities of the LHC will necessarily transform our lives, but it does mean that, actually, you never know...
+Palliative care - The best way to die... | Le Devoir
+With its Dying with Dignity Commission, Quebec recently discussed the delicate issue of the end of life.
+The debate is due to resume shortly as a bill is being prepared.
+However, in this vital area, much remains to be done.
+Le Devoir attempted to look more closely.
+Just a few weeks ago Mr L. lived alone in his Montérégie apartment.
+The festering prostate cancer had allowed him a two-year respite.
+"They gave me five years to live, I've made it to seven," he says, with mixed emotions, lying in his bed at the Victor-Gadbois palliative care home in Beloeil, where he arrived the previous day.
+"But it's still a shock, you can never be prepared for it" he adds.
+The disease is doing its work: huge weakness which prevents him going to the toilet alone, and even eating alone.
+Sitting in front of an appetising lunch, he consents to being helped to eat, resigned.
+Courageous, he even manages to smile, talks to the strangers bustling around him, bringing him his medication, offering him a bath.
+The courage of ordinary death.
+"What I want most is to be cured of my diarrhoea, it's humiliating" he confided.
+A few hours later, the team found a cure for this illness.
+"During our lives, we learn that a man pisses standing up," says Pierre Brodeur, psychologist at the Victor-Gadbois home.
+Regressing to the stage of a child, for some people, is an unacceptable humiliation.
+"It depends on the person's ability" to accept the regression, he says.
+Because, in the opinion of a number of people working in palliative care, great moments occur at the very heart of such regression.
+Patients at the Victor-Gadbois palliative care home all suffer from cancer.
+They have a maximum life expectancy of three months.
+At this stage, the team of doctors and nurses surrounding them no longer provides so-called "curative" care.
+For Mrs A., 89 years old, the worst fear is to die "conscious and suffocating."
+But the disease has made me discover my children.
+"I have fine children" she adds.
+"I don't wish for anything more in life" she says, before accepting having a mask put on to help her breathe.
+She looks forward nevertheless, in the next few days, to a last visit by her son coming from Italy.
+At Victor-Gadbois, a group of volunteers provides bodily care and help with feeding.
+This is palliative care, given when there is nothing else that can be done.
+To make death more comfortable.
+In Quebec, there are palliative care beds for 11,700 inhabitants.
+This is very few when we know that we will all die one day.
+Here, life continues under the best possible conditions, explains Dr Christiane Martel, one of the doctors at the home.
+Whether at a physical comfort, emotional or spiritual level.
+A person who is dying will accept being helped to drink brandy or Pepsi, whatever is their tipple.
+Diabetics no longer need to control their blood sugar.
+And death is part of everyday life.
+Yesterday evening, a beer was served to Mr X, who died during the night.
+This morning, it is his son who will finish the beer at the feet of the deceased.
+"We help relatives as much as patients" says Nathalie Savard, Director of Care.
+At the Victor-Gadbois home, one day follows another but no two are alike.
+Along with a 93-year-old man who is savouring his last meeting with his family, sitting firmly wedged in his pillows while toasts are drunk in his honour, a 36-year-young man is dying tragically, surrounded by his parents, his wife and his two young children, after having tried everything to survive.
+"For six months, there have always been three to five beds which are occupied by cancer patients less than 45 years old" says a concerned Dr Christiane Martel.
+53% of patients admitted to the Victor-Gadbois home come from their homes, 47% from hospital.
+Lack of access to palliative care
+It is said that 77% of Canadians simply have no access to palliative care, which is care designed to ease the pain when a patient has reached the terminal stage of life, be it at home, in hospital or in a care home.
+And a number of organisations, such as the Victor-Gadbois home and the Palliative Care Society in Greater Montreal, specialise more or less exclusively in care provided to cancer patients.
+It is precisely this large gap in Quebec health care which has made a number of palliative care physicians fear the adoption of a law on euthanasia and assisted suicide.
+Since October, a manifesto, signed by palliative care luminaries including Dr Balfour Mount and Dr Bernard Lapointe, has been circulating to demonstrate their opposition to such an initiative.
+According to Dr Christiane Martel, the Quebec health system is not effective enough to ensure that everyone will be entitled to quality palliative care before it is accepted to proceed to euthanasia.
+Recently, she says, I saw a patient spend 14 days in emergency, in great pain, without anything being done to ease her suffering.
+I'm afraid that patients ask to die because they don't receive adequate care.
+And at the same time, some oncologists work relentlessly on their patients until the last day, despite the worst prognoses.
+Hélène Richard's survival hopes were already minimal when she ended her gruelling chemotherapy.
+When I announced to my oncologist that I was stopping the treatment, she told me she regretted that I had given up fighting, she said.
+However, she had told me I was finished!
+No all-powerful care
+Dr Martel believes that 90% of patients asking to die thank care-givers for not having acceded to their request after they have been relieved of their pain by a palliative care team.
+But it must be said that palliative care is not absolutely all-powerful in the treatment of pain.
+According to Elsie Monereau, Palliative Care Director with the Palliative Care Society in Greater Montreal, patients are resistant to treatment against pain in 8% of cases.
+At the very end of life, physicians then often resort to palliative sedation, which is equivalent to putting the patient to sleep until the time of death, either sporadically or permanently.
+We can no longer pretend not to understand this part of their suffering.
+Increasingly, an unrelieved patient will have the option of having such palliative sedation.
+Patients who are not relieved always say the same thing: "I want to die."
+But this does not necessarily mean "I want you to euthanise me," it means "I want to be relieved."
+This report was made possible thanks to a journalism award from the Canada health research institutes.
+Widespread real estate scandals in Quebec
+Day after day highway officials, building contractors, political party fund-raisers and Italian mafia specialists tell what they know of a formidable "system," combining the building industry, government officials, politicians, trade unionists and organised crime.
+An "industry" which has cost Quebec taxpayers dearly, especially in the 1990s and 2000s.
+"It is curious how the system is crumbling since we took drastic measures" says Jacques Duchesneau ironically, a Quebec politician and former Montreal Chief of Police.
+It was through him that the scandal broke in 2011, in an in-depth investigation into corruption related to road construction contracts in Quebec, to which the liberal Prime Minister at the time, Jean Charest, had consented only reluctantly.
+The "Duchesneau report" established a direct link between industry, under-the-table financing of parties and bribery of officials.
+"Since the inquiry opened in 2010, he says, the Ministry of Transport alone reportedly saved a billion dollars on contracts," with certain people curbing their instincts to get a share!
+The Charbonneau Commission "has already brought down two mayors" he adds, hoping that it will succeed in "revealing the schemes behind the individuals."
+A permanent anti-corruption unit, created in 2011
+The Permanent Anti-Corruption Unit, created in 2011, is also coupled with its army of government analysts, investigators, and auditors.
+Plus the "Marteau squad" policemen who, since 2009, have apparently led the Montreal "sewer cartel" to soft pedal on the inflation of contracts...
+In recent weeks, it has conducted a series of searches and brought charges of fraud and corruption against municipal politicians, such as Frank Zampino and Richard Marcotte, Mayor of a suburban town.
+Next on the list is apparently Gilles Vaillancourt, who has just resigned from his post as Mayor of Laval, third largest city in Quebec.
+He is suspected of pocketing repeated bribes in exchange for public contracts.
+Others formally accused are Montreal highway engineers and Italian entrepreneurs, including Tony Accurso and Lino Zambito.
+The latter caused a sensation by explaining the mechanics of the public contracts "system" to the commission.
+He himself paid 3% of the value of the contracts obtained in Montreal to an intermediary linked to the mafia who in turn paid the money to Union Montréal, Mayor Gérald Tremblay's party.
+Mr Zambito has handed money out freely in the 2000s, giving over 88,000 Canadian dollars (roughly 68,000 euros) to provincial parties, especially the Liberals then in power.
+He also admitted having organised an illegal fundraiser for former Liberal Deputy-Prime Minister, Nathalie Normandeau.
+Sewer contracts with inflated costs
+In Montreal, the corruption "system" ran smoothly.
+Gilles Surprenant, former public works engineer, described it in detail in front of the commission: in ten years, he received from construction companies gifts, invitations to trips, golf tournaments, restaurants, hockey matches and bribes totalling 736,000 dollars, in exchange for sewer contracts of which he inflated the costs.
+Other highway officials admitted having their palms greased by inflating invoices by 30 to 40%, and by false add-ons.
+Then an organiser of the Mayor's party, Martin Dumont, accused Mr Tremblay of having deliberately closed his eyes to a parallel budget feeding his coffers with dirty money.
+Following these revelations, Mr Tremblay resigned in early November, plunging Montreal into a major crisis.
+Chantal Rouleau was one of the first women in Montreal to raise the alarm.
+Mayor of the borough of Rivière-des-Prairies, to the East of the island, she protested in 2010 against the sale of municipal land bought for 5 million dollars and resold for... 1.6 million to developers, at the height of the real estate boom.
+70% dirty money in election campaigns
+On the investigation which will eventually be implemented, she says she "is following a thread in order to find out how the system - infiltrated by ants - works, to put a stop to the gangrene and catch the culprits."
+The process, she says, is "painful but positive."
+The wound is being cleaned, but Montreal would need its own investigative unit with ongoing monitoring, to avoid the return of these questionable practices.
+How to clean house.
+Properly.
+Jacques Duchesneau notes for his part that "officials stole hundreds of millions of dollars," but he is especially concerned about the role of "elected people aware of the scheme," when they were not up to their necks in the scam!
+Estimating the share of dirty money in the financing of election campaigns in Quebec at 70%, he says dryly: "I was told that it was only a pale reflection of reality."
+The Quebec government proposes to limit donations to parties to 100 dollars, but this will not change the situation, he says: "Until election expenses are strictly limited, there will be dirty money in politics."
+He advocates a complete overhaul of the system for granting public contracts and party funding: "We can't go any lower; getting to the bottom of things, with courage, will help to rebuild the house on more solid foundations, with more controls and laws."
+Although this story tarnishes the international image of Quebec and Montreal, Mr Duchesneau invites anyone laughing to look in their own backyard...
+"PSG is not FC Barcelona!"
+This season, you have taken on a new stature with PSG.
+How do you explain this progression?
+It can be explained by individual awareness but also by the new dimension of PSG.
+Some great players have arrived.
+Every day I'm making progress alongside them.
+The technical staff has also brought me a lot.
+Day by day, all these things help me raise my level of play.
+And, in a match, it's easier.
+Everything moves very fast in football.
+But I don't get worked up.
+From my debut at the Clairefontaine INF pre-training centre to my transfer to Saint-Etienne, I've always moved step by step.
+So you benefit from the competition brought in by Carlo Ancelotti...
+This summer's recruits are used to playing matches at a high level.
+They also know that every training session is crucial.
+Which is what makes a player like me want to face up and give my best.
+On the other hand, Carlo Ancelotti gives me a lot as regards my position.
+He's supported by deputies like Claude Makelele, who played in the same position as me.
+Is Ancelotti the man for the job?
+Definitely.
+Ancelotti inspires respect among all the experts.
+Today he has no equal in Ligue 1, and he's one of the best coaches in Europe.
+He has masses of experience and has won many titles with top clubs.
+He's worked with great players.
+I think he will bring more titles to Paris.
+In January, I had an encouraging discussion with him.
+I was just coming back from a series of injuries.
+The confidence he gives me also explains my performance.
+What importance do you attach to the first part of the season for PSG?
+In Ligue 1, Lyon overtook us at the top.
+But we're waiting on the sidelines.
+One of our main goals is the Champions League: we qualified for the last 16 in the right way.
+What is the club's goal in this competition?
+We'll try to go as far as possible.
+From now on, anything can happen.
+But we'll have something to say against some very good European teams.
+First of all, we want to finish top in our pool, ahead of Porto, to have home advantage in the last 16 match.
+Can PSG become a top European club in the short term?
+It already has the budget...
+To become a top European club, Paris needs to win titles and keep it up over time.
+Today, this isn't the case.
+Financially, PSG has the means to make it happen.
+In Ligue 1, would not winning the title, like last season, be a big failure?
+Definitely, it would be a major disappointment.
+This year, we're really committed to winning the championship.
+We weren't far away last season.
+In May, there was great disappointment because we were good enough to finish first.
+It was a terrific season.
+We finished with 79 points.
+Normally, 79 points is good enough to be top...
+But another team, Montpellier, had an even more fantastic season.
+I think this is the year.
+Even if big teams like Marseille, Lyon and Bordeaux are competing for the title, I think we have the weapons to win.
+Do you think the media expect too much of PSG?
+It's normal for them to expect a lot from us given what's been invested and the players we have.
+We totally accept it.
+After we won 4-0 at home against Troyes and they still found things to blame us for, that's definitely a bit frustrating.
+You wonder what more people expect.
+You're never going to win 4-0 every weekend.
+We're not FC Barcelona!
+We're trying to implement a game project.
+It takes time to build a team.
+The Champions League proved we could hold our own.
+Look at Manchester City who, for two seasons, have failed to qualify for the last 16, despite also having spent huge amounts!
+Based on the amounts invested, you should be 15 points ahead at the winter break!
+That would be to ignore our opponents and the French Championship.
+Lyon and Marseille, who were no good last season, were "boosted" by the new PSG.
+This shows that Ligue 1 is exciting.
+I hope that in May we will be able to smile in saying that, despite all the difficulties, we finally did it.
+PSG seem totally dependent on the exploits of Zlatan Ibrahimovic.
+So much so that people say there is a "Zlatan dependence."
+This means Ibrahimovic is very successful and scores a lot of goals.
+That's why he came, and he's proving he's the star of Ligue 1.
+He's demonstrated everywhere he went that he was a great player, a world star.
+Within the group, we respect the man and the player.
+And also he respects the men he has around him.
+What he has done is truly exceptional.
+It pushes others to raise their level of play.
+Thiago Silva, who is one of the best defenders in the world, also helps everyone else progress.
+How did you get on in Euro 2012 with the France team?
+A disappointment.
+I really wanted to play in this Euro.
+Unfortunately, my injury prevented me from getting any game time.
+I saw some things there and came out stronger.
+Today, I'm playing well in selection matches.
+Which is what I've been hoping for since my baptism with the Blues.
+I've learned the lessons from what happened in the Ukraine and I now owe it to myself to have exemplary behaviour.
+What do think about Didier Deschamps's first few months in charge of the Blues?
+He has the results he wanted.
+We're well placed in the World qualifying group.
+The coach is tough, close to the players, and inspires them to win.
+Like Laurent Blanc was.
+But I don't want to make any comparisons.
+Blanc had achieved his goal when we qualified for the Euro.
+I hope Didier Deschamps will take the Blues to Brazil.
+Did the good draw (1-1) snatched in Spain, on 16 October, represent a founding match?
+That match gave us confidence.
+Everybody fought for everybody.
+Before that shock in Spain, I'd never experienced such a match in my career.
+With Bitcoin, pay and sell without banks
+The opposite of current monetary exchanges, based on central banks, identified transactions and processing fees among the parties involved.
+In addition, as often in these technologies, a political vision is palpable: the belief that the current monetary system, made up of banking monopolies, leads to financial crises.
+In fact, Bitcoin, invented by Satoshi Nakamoto (a pseudonym), is both a virtual currency (but convertible into dollars, euros) and a secure exchange protocol like BitTorrent, which allows peer-to-peer file exchange.
+Around 200,000 transactions have already been recorded via 15,000 computers on the network.
+Close to a thousand web sites accept bitcoins as donations or means of payment.
+The bitcoin exchange rate, after reaching a peak of 30 dollars (23 euros) in June 2011, fell to 2 dollars five months later, returning today to around a dozen dollars (rates are listed on the bitcoincharts.com site).
+Nothing very impressive, compared to global transactions in real currency or financial products.
+However, the European Central Bank (ECB) took an interest in it in a report on virtual currencies published in October.
+It describes bitcoin as "the most successful virtual currency," "in competition with the dollar or the euro" and "similar to conventional currencies."
+Bitcoin differs from other types of virtual currency such as 'credits', used to progress in a video game which you win by playing or which you can buy (and sometimes exchange in return).
+The social network Facebook has also developed this kind of system.
+But, on each occasion, a central authority controls and handles the exchanges.
+With Bitcoin, all nodes in the network are both custodians of the book of accounts, auditors, currency issuers, and buyers and sellers.
+How does the network operate?
+Each transaction between two users is actually carried out between two electronic addresses like with an e-mail.
+Except that a user can choose a different address for each payment, thereby ensuring anonymity.
+A set of information associated with this transaction is signed electronically by a dual-key encryption system.
+So the network can verify the authenticity of the transaction.
+Using the contents of the file, it is also possible to ensure that the exchanged bitcoins exist in the public book of accounts, broadcast across the entire network.
+The key step is entering the new transaction in the book.
+It passes through the resolution of a mathematical challenge issued to the computers, and the winner, a kind of interim central banker, will have the privilege of adding this extra line.
+This is a file hashing phase, i.e. the transformation of a large file into a shorter and unique digital imprint.
+Computers "take" the new transaction and add a number to it, then "hash" it all up.
+The goal being to find the number that gives a special imprint (lots of zeros at the beginning).
+Once this number has been found, the other nodes can easily check that it is the right one.
+The transaction is then indestructibly linked to the chain of all the other transactions; any modification would alter the imprint.
+If a user wanted to defraud by paying twice with the same money very quickly (less than ten minutes), only one of the two transactions would be validated by the network - the other would remain an orphan because the two have different imprints.
+The computer that resolves the challenge wins 50 bitcoins.
+To avoid inflation, this award is regularly divided by two, probably by the end of 2012.
+The number of bitcoins in circulation is therefore limited to 21 million, but they are divisible down to the hundred millionth, which leaves some margin...
+The difficulty of the challenge is also raised with each increase in computing power.
+The life of the network has had its ups and downs.
+Websites providing services for Bitcoin have been attacked and bitcoins in deposits stolen.
+"The loophole used is not the protocol itself" says Pierre Noizat reassuringly, who has just launched Paymium, a real currency payment company that uses the Bitcoin network.
+The ECB also highlights the possibilities of money laundering using this anonymous service.
+But cash also has this weakness.
+Major players like Wikipedia refuse donations of this nature.
+Others, such as the WordPress blog platform, accept them.
+Recently, Adi Shamir and Dorit Ron, from the Weizmann Institute in Israel, analysed the accounting books and showed that almost 80% of bitcoins do not circulate.
+In November, "huge sell-offs" were launched.
+"Thirty thousand dollars were exchanged" welcomes Jon Holmquist, who works for Coinabul, which converts bitcoins to gold.
+Pierre Noizat, also author of an educational book on this currency, has a lot of faith in the potential of this technology as a transaction network.
+His system, Paytunia, is equivalent to a credit card (in real money) or a contactless payment by mobile, but it uses Bitcoin to validate transactions, which are thus cheaper.
+Also the user manages his identity and can therefore be anonymous.
+The system is easy to implement by merchants, who do not need to install new terminals or software.
+They just need to provide an address that a phone can "photograph and recognise" says Pierre Noizat, who confirms he has thousands of users.
+There is a general movement to reappraise hierarchical systems for more horizontal systems.
+"It will take time for Bitcoin to become firmly established, but 2013 could be a turning point," he predicts.
+The ECB, in its report, says it will reassess the various risks, currently regarded as high, in the event of the currency's success.
+We got out of Afghanistan.
+What now?
+French troops have left their area of responsibility in Afghanistan (Kapisa and Surobi).
+NATO and the Americans are due to follow in late 2014.
+It is time for the Afghan army to resume possession of its territory and the Afghan people to choose their future, without expecting us to do everything.
+It is mainly Afghan peasants that we have punished by regarding them as terrorists.
+And ourselves, with our 88 soldiers killed, plus the wounded, the maimed.
+The Taliban is composed of foreign extremists, former leaders in refuge in Pakistan, but often peasants who refuse the presence of foreign armed forces, like in the time of the Soviets.
+They want to defend their traditions, both ancient and archaic, even though they have been joined by Jihadists, Pakistanis, Arabs, Uzbeks, Tajiks.
+Tolerated, sometimes assisted, by local insurgents, the latter will no longer be so when Westerners become more scarce.
+The departure of French troops from the Nijrab base, which I observed from the top of hills of almond trees planted with French funding, was carried out in an orderly fashion.
+Convoys of trucks and armoured vehicles reached Kabul without being attacked, overflown by helicopters.
+There will be no wave of the Taliban in Kabul by the end of 2014.
+Circumstances have changed since their irresistible advance between 1994 and 1996.
+At that time Kabul was empty, the country being torn apart by the struggles between different factions.
+Their takeover of the country had been perceived then as a sort of liberation, a return to safety.
+Afghanis paid the price of the obscurantism of these peasants by the organisation of Al-Qaeda, but their situation has not improved today.
+Former Mujahidin, the Afghan Government and the current Taliban are allied in the desire to keep women in an inferior position.
+The main anti-Soviet war leaders returned to power in 2001.
+They became profiteers, seizing government land to resell as building land to refugees returning from Iran and Pakistan, benefiting from huge American outsourcing contracts.
+They have become discredited; what is more, most of them did not fight themselves.
+The people, as I heard in the countryside, want a Government that is not made up of thieves.
+Many young people want to leave, as those who were able to benefit from American largesse will leave: the flight of capital is considerable.
+The young people are tired of war and its ideologies.
+They have rubbed shoulders with the modern world during their exile in Iran or Pakistan, and appreciated the benefits.
+Roughly 65% of the population is less than 25; Kabul now has 5 million people, a fifth of the total population.
+In towns and cities, the state schools are full, with girls and boys alike.
+It will be necessary to provide work for those young people who no longer want to return to the obscurantism of the former parties or the corruption of certain leaders.
+All of them, including the armed opponents, are partial to mobile phones; television, with its Turkish soap operas that show a modern world, is followed everywhere.
+The army is now present.
+Will the authorities who command it be considered legitimate?
+Former commanders of the anti-Soviet struggle are already thinking about restoring provincial militias, which will escape the central power.
+Afghanistan, land of mountains, with strong local identities, should be able to benefit from a certain decentralisation, in the image of the Western nations, but the United States wanted to turn it into a centralised State, with strong presidential power, abolishing the post of Prime Minister, which had existed since the 1964 Constitution.
+President Karzai does not want any foreign controls, particularly on the occasion of the elections in April 2014.
+But, since the 50s and already well before, his country has been dependent on foreign aid.
+No industries have been re-established, no dams are in good condition, no major irrigation systems have been repaired.
+Everything is imported; nothing is produced, apart from fruit and vegetables.
+The Priority is left to private initiative.
+In a country ruined by thirty years of war, government control over the infrastructure would have been necessary.
+The rumour was spread that Afghanistan had huge mineral wealth.
+This only added to the feeling that the Westerners were only there to seize it.
+With no energy to process the iron ore or copper on site, or means of transport to export it across the mountains, there is no mining.
+The Chinese have already almost left the Mes Aynak copper mine, leaving international archaeologists (funded by the World Bank) to search the huge Buddhist site and remain the largest employers in the province.
+One day it will also be necessary for Afghanistan and Pakistan, on which imports and exports largely depend, to restore normal relations.
+The departure of French combat troops was completed on 20 November.
+The new cooperation treaty provides for the continuation of traditional aid: girls' high school, boys' high school, French Department at the University, French Institute, cooperation in the military, legal, medical and agricultural fields, support to the archaeological Delegation.
+Since 2009, to try to "win hearts and minds" and achieve the impossible task of reconciling aid and offensive actions, a "civil-military actions" service from the Ministry of defence (Cimic), closed in 2012, has carried out, and continues to carry out successfully, through a small French NGO, many community and agricultural rehabilitation projects in dozens of mountain villages.
+These projects, involving large numbers of local labour, have helped to contain the insurgency: irrigation, wells, drinking water, reforestation, fruit trees, soil protection and increase in cultivable areas.
+What will we leave as a souvenir, after two billion euros of military spending?
+A much more modest budget would contribute to improving local living conditions, which are very hard in these valleys often located over 2,000 metres above sea level.
+The Embassy has received dozens of written requests for small agricultural projects from local communities in Kapisa province.
+To be in a position to free themselves from the uprising led by foreign groups, which is what farmers told me they want, a small amount of civil aid should be maintained in their favour, well controlled and directly affecting them.
+A Constitution by force in Egypt
+A new gamble for President Mohammed Morsi.
+While Egypt remains more divided than ever around the constitutional declaration, which temporarily grants him full powers, he has decided to go for broke.
+Taking everyone by surprise, he announced on Wednesday that the Constituent Assembly would vote on its final text the following day.
+Just a week ago, the head of State had given the Assembly two more months to finish its work.
+For two years Egypt has relied on a provisional text, amended several times and this has weakened institutional stability and led to legal imbroglios.
+This new initiative has only served to enhance the divide in the country.
+According to his opponents, the President is persevering in his "autocratic delirium," continuing to "go back on his word" and 'trample the law."
+His supporters affirm that this is the quickest way to put an end to the institutional and political crisis, by speeding up the transition process.
+A referendum is due to be held within the next two weeks.
+A very short period, which forces the Brothers to abandon their plan to explain the text, article by article, to the Egyptians.
+For the President, it is also a way to achieve popular and democratic legitimacy while the dispute rages throughout the country.
+Mohammed Morsi seems convinced that Egyptians will vote favourably, as he stated in an interview with the American weekly Time.
+Particularly since a hasty vote smacks of an ultimatum to the Egyptian people: "Either you vote for my text, or I keep full powers," these powers supposedly expiring following adoption of the Constitution.
+It was in a strange atmosphere that 85 members of the Constituent Assembly, with a large Islamist majority, voted on the text yesterday.
+Most of the liberals were missing.
+In mid-November, shortly before the constitutional declaration, they had slammed the door, feeling they had failed to assert their views.
+Representatives of human rights, religious minorities or civil society had done likewise.
+In order to obtain a quorum, 11 members, alternates, were hastily added yesterday morning.
+Some of them are very close to the Muslim Brotherhood.
+Not surprisingly, the articles were for the most part voted unanimously.
+Commentators were also amused that one of the only diversions of the day was expressed with regard to... the hour of prayer, some Committee members feeling that the Constituent Assembly clock was wrong.
+The text, which was still being voted on yesterday evening, has 234 articles.
+The main focus of attention, article 2, remains in the final analysis identical to that of the 1971 Constitution, stipulating that "the principles of sharia are the main source of law."
+The Salafist parties, for which the establishment of Islamic law is a major claim, were hoping to replace "the principles" by "the rules," which would have allowed stricter application.
+For the Islamists, the fact that this article was not amended is a guarantee of their goodwill and their respect for the other elements of Egyptian society.
+"Hypocrisy" respond the liberals, who see only a communication coup.
+Because in their opinion Islamisation of the Constitution is done through other articles.
+They refer in particular to article 220, which grants Al-Azhar University an advisory role, with particular reference to verifying the conformity of the laws with sharia.
+According to Egypt specialist Sophie Pommier, this is worrying because "the people called upon to advise are not elected and have no democratic legitimacy.
+This suggests the beginnings of a theocracy."
+The liberals' fears are also fuelled by the fact that the next Rector of the university will probably be much less moderate than the current one.
+"For the time being, there is no concrete religious implication.
+With this Constitution, things remain under civil rule.
+Most of the lawyers who worked on this text are not Islamic law scholars but academics, some trained in the French system" qualifies Alexis Blouet, who is writing a thesis on the Egyptian constitutional transition.
+But he acknowledges that "there may be some ambiguity regarding article 220, because the terms used borrow from the religious vocabulary.
+Reference is made in particular to "fiqh" [Islamic jurisprudence, Editor's note].
+And the question could be asked in future to what extent civil judges are competent to pronounce on it."
+Beyond its religious aspect, the text voted on yesterday is highly criticised due to the extensive powers it grants to the President of the Republic.
+The Muslim Brothers argue that they are significantly reduced compared to what they were under the former regime.
+Another issue: the powers conferred on the army.
+In accordance with the wishes of the military, the Defence budget review will be not submitted to Parliament, but to a National Defence Council.
+Nor will trials of civilians will be banned in military tribunals, as requested by associations for the defence of human rights.
+Who also voice their concerns about the text, which they consider repressive.
+The offence of blasphemy is maintained and insults are now prohibited, which could have serious consequences on freedom of expression, particularly for the press.
+In addition, no longer does any of the articles refer to the protection of women, highlights Heba Morayef, from Human Rights Watch.
+In her opinion, the only positive point is the prohibition of torture in article 36.
+The word was not included in the previous Constitution.
+While the Egyptian President was speaking yesterday evening on television, demonstrations are planned for this afternoon.
+Supporters of the Head of State will march on Saturday.
+In Israel, holy places await Ukrainian tourists, the omphalos and a sea of saline water
+The Holy Land combines the splendour of biblical truths, modern comfort and primeval nature.
+AiF [Argumenti i Fakti] newspaper highlighted the five most important reasons why it is a must to visit Israel.
+Let's worship the holy places
+It is worth visiting the River Jordan where Jesus was baptized.
+It is considered that all who enter this baptism "bath" are blessed by God.
+Galilee is the place where Jesus performed his magic: turned water into wine at a wedding, walked on water, calmed a storm, and filled the nets.
+This is also where Jesus came before his disciples and after the resurrection.
+But the biggest number of holy places is in Jerusalem.
+Believers walk through the Way of Grief or Via Dolorosa.
+It starts by the Antonia Fortress - Praetorium - where the judgement took place, and brings us along the streets of the Old Town to the Church of the Holy Sepulchre on Golgotha - the place of the crucifixion, Stone of Unction and the place of Jesus' burial.
+This is also the location of the symbolic Christian omphalos, which symbolizes the salvation of mankind.
+The Holy Cross Monastery in Jerusalem is erected at the site that, according to Christian legend, yielded the tree used to make the cross for Jesus' crucifixion.
+Jerusalem has the most holy places for the Jews as well - the Wailing Wall, which remained from a temple destroyed by the Romans in 70 AD.
+According to tradition, people of different faiths leave notes here with their wishes, which are then fulfilled.
+Travel along a vertical
+Ruins of the Massada Fortress remain from a secret refuge from enemies, built by Herod in 25 BC for his family.
+They are located on cliffs in the mountains at an elevation of 450 m above sea level.
+They can be reached on foot only by those who are into mountain climbing.
+Others are delivered to this historical mountaintop by a cableway.
+In the north of the country, at an elevation of 1600-2040 m, there is a famous ski resort called Hermon, which fills up with tourists in winter months.
+A shuttle bus brings people to it from the foot of the mountain.
+The total length of ski pistes is 45 km.
+According to an ancient legend, pagan gods used to live on the mountain.
+Visit unique museums
+This country has about 300 museums.
+You won't be able to visit all of them on one trip
+But at least the five most interesting ones are worth a visit.
+Among them - Museum of Israel, located close to Knesset (Parliament).
+It has ancient Qumran manuscripts and Dead Sea scrolls found in the caves of the Judean desert, along with about 500,000 archaeological and anthropological artefacts.
+The Museum of Art in Tel-Aviv is also interesting.
+Its exhibits include a wide range of impressionists and expressionists like Monet, Pissarro, Renoir, Sisley, Cezanne, Matisse, Modigliani, Chagall, Picasso.
+In Akko, you can visit the bath museum Al-Basha, which consists of several rooms of ancient Turkish baths with models of visitors and bath attendants of the time.
+In Caesarea, it is worth visiting the unique private Ralli Museum, where you can enjoy the sculptures of Dali and Rodin.
+There are no tour guides or gift shops.
+Entry is free of charge, and contributions are strictly not allowed.
+The fifth one is the Holocaust Museum or Yad Vashem in Tel-Aviv, which tells one of the most dramatic stories in history.
+The most tragic section is the children's memorial, built in memory of 1.5 million children killed in concentration camps and gas chambers.
+You go in and find yourself in complete darkness.
+Stars are glimmering,
+and you listen to names of Jewish children and countries where they died.
+Ukraine is mentioned there too.
+Wellness
+There are three resort areas in Israel, located on the coasts of the Mediterranean, Red, and Dead Seas.
+Each have swimming pools, aqua parks, dolphinaria and oceanaria.
+It is notable that one can swim in the Red Sea even in winter months, because the water temperature does not drop below 21 degrees and the air warms to 23 degrees.
+The Dead Sea is even warmer, and people swim in it all year round.
+Incidentally, it is the most unusual sea in the world, located in the lowest point of the planet - 417 m below sea level.
+Its azure water is saline and easily keeps you afloat, even if you don't know how to swim.
+The surrounding landscapes are surreal in their beauty.
+People come here to undergo a course of treatment using salt water - wraps and medicinal muds, and to improve their health if they have dermatitis, allergies, asthmas, eczemas, arthritis, bronchitis, or diabetes, or to return emotional balance.
+Touch the mysteries of antiquity
+They are preserved in the old section of Tel-Aviv - in the town of Jaffa on the Mediterranean Sea.
+The famous sea route connecting Egypt, Syria, Anatolia, and Mesopotamia runs through it.
+The city is mentioned in ancient Greek and ancient Egyptian legends.
+According to legends, this is where Noah built his ark and Perseus saved the beauty Andromeda, with whom he lived a long and happy life.
+Tourists really like to wander the narrow streets named after signs of the zodiac.
+They say, if you touch the walls on the street of your sign, fortune will come to you.
+In Jaffa, you can meet newlyweds who come from all over Israel and even from other countries for photo sessions.
+And in Caesarea - the city of King Herod - you can walk around a Roman theatre, "capture" the Crusader fortress.
+During the Roman period, Caesarea was the main city of Judea and the residence of Roman prefects, including Pontius Pilate.
+The carefully restored theatre is now used for evening concerts and opera performances.
+A note for the tourist
+When you go to Israel, don't worry about your bad English knowledge: approximately 30% of the country's population speaks Russian.
+For the trip, it is better to take dollars, not euros, because they are easily exchanged for shekels (currently 1 dollar = 3.8 shekels).
+City transportation is mainly buses, but Jerusalem has a high-speed tram, and Haifa has the only subway line in the country, comprising six stops and connecting upper town with lower.
+In essence, it is an underground cable railway.
+A ticket for any type of city transportation costs 6 shekels, and you can ride for 1.5 hours with transfers.
+According to the Jewish tradition, Sabbath is celebrated in Israel.
+Between Friday evening and the sunset on Saturday, markets, stores, and public transportation stop working.
+The work week starts on Sunday morning.
+Many cafes, restaurants and hotels have only kosher food, with no pork, seafood, fish with no scales, or dishes that combine milk with meat.
+There is a wide selection of dishes from lamb and beef, soups and desserts cooked using coconut milk, traditional Jewish hummus paste, various sauces, falafel (balls made of ground chickpeas), fruits and vegetables.
+The streets of Israel don't have homeless dogs.
+But there are many well-fed cats, which walk around lazily.
+In the evening, they can even be seen sleeping on roofs of parked cars.
+These pussycats like busy places and do not refuse treats.
+Car rental, depending on car type, costs from 37 (Hyundai Getz) to 188 (Audi A6, Volvo S80) dollars a day.
+Plus insurance of 15 dollars a day.
+Bike rental costs 15 shekels a day.
+Museum entrance costs 30 shekels on average.
+In numbers
+In 2012, over three million tourists from around the world visited Israel.
+Visitors and holidaymakers arrive mostly from the USA, Russia, France, Germany, Italy, England, and Ukraine.
+Between January and October 2012 118,800 Ukrainian tourists visited the Holy Land, which is 51% more than a similar figure in 2010, before the removal of the visa regime on February 9, 2011.
+Only the "high and mighty" make it to Moscow: migrants save money for language
+While deputies and human rights activists argue about the purpose of the law on mandatory language testing, the country already has scam artists who sell fake certificates.
+Every year, 13 million migrant workers come to Moscow, St. Petersburg and other cities in Russia.
+Mostly these are citizens of Central Asia: Uzbekistan, Tajikistan and Turkmenistan.
+Their only goal is to earn money to support families back home.
+A new law came into effect on December 1, which obliges every migrant worker to pass a Russian language test.
+For the moment, this law applies only to those who intend to work in services, housing and utility services, household services, and retail.
+But with time - as promised by the Federal Migration Service - tests will become mandatory for all non-residents.
+In addition to language, Russian history and basics of the legal system will be tested.
+Language knowledge will have to be confirmed both to receive and to extend the work permit.
+An exception is in effect only for citizens of countries where Russian is a state language.
+People who received education certificates and diplomas before the fall of the USSR in 1991 are also exempt under the law.
+Purpose, doomed fate, and the protection of rights
+Seven testing points will be operating under the auspices of the Pushkin Institute of Russian Language, Peoples' Friendship University of Russia, Moscow State University (MGU), St. Petersburg State University (SPbGU), and other Russian education institutions.
+Migrants can take the tests in all cities; more than 160 such centres have been opened.
+The initiative to introduce the testing was supported by State Duma members and the Federal Migration Services.
+But human rights activists, asked the question repeatedly in the press before the law came into force: what will it actually achieve?
+What will the obligation to know Russian change for the Russians and for the non-residents?
+First of all, according to representatives of the migration service, this will allow to reduce the number of people suffering from labour slavery.
+Many speak about protection of the rights of work migrants, explains the Head of the representative office of the Federal Migration Services of Russia, Viktor Sebelev.
+Rights protection should begin before their departure.
+Only the system of organized selection will enable us to solve 90% of the problems of foreign workers.
+Migrants without profession, education, who do not know Russian, who do not have a medical certificate start to have problems.
+If a migrant does not understand the language, says Sebelev with certainty, he is doomed to come across unconscientious people, who, pretending to help, will force upon him a "ticket" to terrible, cramped barracks where many others like him will suffer without food and documents, slaving away 12-14 hours a day.
+We receive many complaints from our migrants.
+"They are promised one thing at home, but when they arrive, they are lied to, their passports are taken, they are not paid what they were promised," confirms the Head of the Main Migrant Labour Administration of the Migration Service of Tajikistan Tolib Sharipov.
+Not be angry, boss!
+Nonetheless, many citizens of Central Asian countries, who plan to go to work in Russia, admit that not only their understanding of the language of the country where they are going is not good, but they can barely write in their own language.
+Naturally, this is not so much their fault, but due to poverty: very few Turks, Uzbeks, and Tajiks can afford even a basic education.
+Their families don't even have food to feed their children, not to mention decent clothing, shoes, and supplies.
+After reaching adolescence, these kids go to work at the first opportunity.
+It is hard, if language knowledge is bad, they admit.
+"You feel humiliated and inferior."
+But human rights activists note one important point about the law on language.
+Testing will be conducted only for those migrants who have legal status.
+If they have no status, there will be no testing, nor any official work in the future.
+In the meantime, most of the migrant workers continue to live in Russia illegally.
+"Welcome, or No Unauthorized Entry"
+Many of the foreigners assert that receiving official status in our country is not that easy.
+The reason lies in bureaucratic hurdles and the already mentioned language difficulties.
+In addition, legalization costs money: from 12,000 to 16,000 rubles.
+Whereas a fake registration is done quickly and costs only one and a half thousand.
+Officers of the Russian Police know that we mainly have fake papers, without registration, hence the extortion.
+"They ask for a hundred or two for cigarettes, tea," Umed Khushkadamov, a citizen of Tajikistan, shared with journalists.
+"Roll up, don't be cheap, get your artwork"
+On the first day of the law's entry into effect it turned out that not only migrant registration documents can be fake.
+A few forged certificates about passing language tests have been seized by Federal Migration Services officers already.
+Forged documents are printed on a standard colour printer.
+Naturally, they were not free for their owners: each of the migrants, who had hoped to facilitate the task of passing the tests in this way paid seven thousand rubles for them.
+It is two and a half times more than the process of official testing, which costs three thousand.
+Government officials and human rights activists agree that the main goal in the near future is to protect the system from corruption, so that the certificates could not just be bought.
+For the moment, the authorities can promise migrant workers who could not pass the test the first time to give time to complete a basic language course.
+In addition, those who come without Russian language knowledge will be offered work in areas that do not require active communication with people.
+The Ministry of the Interior does not put arms from the illegal market back into circulation
+The share of crime involving legal weapons is extremely low
+The Russian Ministry of the Interior is proposing to toughen up the law for owners of civil weapons.
+This is the reaction of authorities to recent incidents: CLICK shots at weddings, where there were no casualties, and the massacre staged by Moscow lawyer Dmitry Vinogradov, resulting in CLICK the death of seven people.
+Policemen want to prohibit the carrying of weapons in public places and raise the legal age of weapons licensing from 18 to 21.
+The idea was supported by the head of the Duma Committee on Safety and Anti-Corruption, Irina Yarovaya, who promised that the amendments to the law on weapons will be brought to the State Duma in the near future.
+Not everyone is happy that the Russian authorities are trying to fight the problem by "tightening the screws."
+An open letter appeared online, whose authors - representatives of different social rifle organizations - demand to abandon the "senseless toughening."
+The percentage of crime involving registered weapons is minimal, said criminal lawyer Vasily Lesnikov to BBC Russia.
+According to the Ministry of the Interior's statistics, 142 crimes using firearms registered with law enforcement agencies have been committed in the six months of 2012, whereas 1,168,000 crimes have been recorded in total for this period.
+Authors of the open letter are certain that the toughening of the law in the area of civil weapons will not prevent the criminal from going to the "black" market.
+According to them, one can find any weapon at a low price right now.
+Nonetheless, the Ministry of the Interior asserts that the situation of the spread of illegal arms is under control.
+Suppliers: from plants to officers
+The "black" market of weapons is replenished through several channels.
+There are five such channels, explains retired colonel Viktor Baranets, who has worked in the Ministry of Education and the General Staff for 10 years.
+Screenshot of the site that accepts orders for weapons.
+First: "army or military loot," i.e. weapons that were stolen during the fighting in the Caucasus.
+"Weapons were stolen by Russian officers and by the Caucasians," says Baranets.
+Next are "black weapons," stolen by criminals from representatives of defence agencies.
+Baranets explains that this covers weapons taken from police warehouses and those stolen directly from law enforcement agencies' employees.
+Illegal arms are taken to be sold from military warehouses.
+Explosions have often been heard at military warehouses.
+"There are proven theories that some of the fires were intentional, in order to cover the shortage," says the former military man.
+Manufacturers of weapons make their contribution, according to Baranets.
+"There are so many private weapons factories now, which do not endure competition on the international market and throw weapons from under the counter to the black market, including in Moscow," says the expert.
+Another source of the "black" market is trafficking.
+An especially high number of guns and machine guns come from poor countries like Kyrgyzstan.
+"There's production there, sometimes handmade; and a mafia has formed, which has organized a stream," explains the former military man.
+Where do the weapons come from?
+Experts counted the approximate share of each of the sources of supply of illegal weapons to the "black" market.
+A report about this was prepared by the Centre of Problems Analysis and Public Management Planning in 2011.
+Experts analysed the reports of the Department of the Interior and Rosstat, criminology literature and open data from portals on weapons.
+The overwhelming majority of illegal weapons, according to the researchers, comes from the military and security forces.
+Half of all arms on the black market are there "because of officials, whose work is connected with weapons," states the report.
+According to researchers' data, 17% of the time the weapons are received from armed conflict areas, 14% is theft during production, 5% is "black archaeology."
+A sales consultant of one of the weapons stores, who wished to remain anonymous, asserts that the weapons found by "black" diggers are not being bought any more, because they're too old.
+According to him, dealers go to the military warehouse for a new batch of goods.
+One piece, for example a TT gun can be bought from a warrant officer.
+It is issued to him, and given through the fence.
+"He takes it to the city and sells for 900 euros a piece with two magazines," he says.
+"The truth is that police are aware of everything, that is why periodically, when the crime detection rate is low, it conducts test purchases from illegal weapons merchants," says the consultant.
+"Like in a luxury store"
+The buyer and seller often find each other through friends.
+I looked at sites, blogs, till someone responded, offering me to go to Begovaya station, where a man will be waiting for me to take me to the "corner" so we can negotiate.
+I found out the price of the weapon only there
+military commentator Viktor Baranets
+To get a weapon, I need someone with connections, says the sales consultant. - I have an acquaintance, but I'm not sure it's reliable.
+There are salesmen on labour markets, but one needs to "come" there conditionally "from John Doe, who asked to tell that his daughter lost a tooth."
+Right now, even if I need a few knuckledusters, I get them through someone I trust.
+He also supplies them only to me, because he knows that I won't give him away.
+Beginners look for weapons in different ways.
+Former military man Viktor Baranets tried himself as a buyer of illegal weapons in the mid-1990's, when he was preparing to publish an article about this.
+The formulas are still the same, according to him.
+He was given an album of pictures with "anything and everything."
+"I felt like I was in a luxury store," he recalls.
+According to Baranets, the buyer is not offered a pig in a poke - you can try out everything.
+I, the potential client, am not just buying; we go to the forest with the seller and set a target there.
+"I am given the opportunity to shoot, and when I am certain that the weapon is good, we begin to negotiate," describes the expert.
+Store on a sofa
+Internet searches lead to sites and "Vkontakte" groups, where weapons "for different purposes" are on offer.
+No documents or personal meetings are needed.
+"It's enough to have a certain sum of money," says the advertisement heading on the website "Buy a pistol or rifle."
+Users leave their requests and ask questions.
+Can a minor buy?
+"Without a license, of course," asks user "John" (name is changed).
+"Want to buy a TT, Moscow," concisely requests "Fedorenkov."
+Federal Security Service now spread a big network of fake sites and there are tons of potential buyers of military weapons.
+People come like hungry fish to bait, and then mine coal in Siberia.
+military commentator and former military man Viktor Baranets
+I heard about this: normally the site is registered outside the area of applicability of the laws of Russia.
+People accept orders.
+The buyer pays at an ATM.
+"In response, a photo is sent with instructions on where the weapon is hidden," says Press Secretary of the Rights to Weapons non-governmental organization Dmitry Kislov.
+Viktor Baranets confirms that after leaving a request on the site you can stay without a weapon and go to jail.
+The Federal Security Service now spreads a big network of fake sites and there are tons of potential buyers of military weapons.
+"People are like hungry fish after bait, and end in Siberia mining coal," - he says.
+Makarov for 100 dollars
+When buying illegal firearms, 100 to 900 dollars is enough according to experts.
+According to Dmitry Kislov from the Rights to Weapons organization, a Makarov gun can be acquired for 100-300 dollars.
+The wait time is a month to a month and a half.
+It is shipped from long-term storage warehouses by the mid-level management of these warehouses.
+According to official statistics of the authorities, the number of such crimes in Russia on the whole dropped 7% as compared to January-October 2011, amounting to 22,900, while the number of cases of theft and extortion of weapons, ammunition, explosive substances and explosive devices dropped by 7.8%.
+Fast-food and supermarket workers are on strike in the U.S.A.
+Up to a fourth of all American teenagers have worked the cash register at McDonald's at one time or another
+In the last few days, there is a wave of protest actions in the U.S.A. against low salaries in supermarkets of the Walmart chain and popular fast food chain restaurants like McDonald's, Burger King, Taco Bell, Wendy's and Kentucky Fried Chicken.
+Right now, nobody is able to predict whether this wave will turn into the ninth wave or it is destined to fizzle out early.
+Actions are being supported by unions and a series of left-wing organizations.
+In addition to increasing the low wages received by employees of Walmart and fast food chains, the goal of the protesters is to create unions within them.
+This sector of the economy is not covered by any union movement yet.
+46 cents a year?
+Actions began last week after Thanksgiving, on Black Friday, when massive sales drew millions of people in America, sometimes accompanied by clashes.
+On this day, some employees of the Walmart corporation, which employs 2.2 million people around the world, left their workplaces and picketed together with the unions and left-wing activists from the corporation stores that sell products to people on low-to-medium incomes.
+Walmart sells everything imaginable, from diapers, hunting rifles and car batteries, to vacuum cleaners, eggs and milk.
+Products in its stores are on average 8% to 27% cheaper than in major supermarkets.
+So many low-paid Walmart employees shop only at their workplace.
+Availability and assortment made Walmart one of the biggest American corporations.
+According to critics, Walmart can afford to sell the products cheaply partly because it pays little to its employees.
+These latter also complain about hard work conditions, for example lack of lift trucks and hand-held scanners.
+Protesters on Black Friday demanded a salary increase and complained that the cost of medical insurance provided by the corporation went from 30 to 100 dollars a month.
+A typical Walmart employee, receiving 9.5 dollars/hour, cannot afford this.
+Scientists from the Berkeley University in California argue that if Walmart raises the average salary to 12 dollars/hour, it will cost the corporation 3.2 billion dollars.
+This is about 1.1% more than it spends on salaries right now.
+If Walmart fully shifts the cost of increasing wages to the shoulders of consumers, each visit to the store will cost only 46 cents more.
+In one year, they will only spend 12.39 dollars more than now.
+Walmart supporters happily note that the protests took place in nine states and did not cause any damage at all to the corporation.
+Black Friday continued in its stores from 8 in the evening on Thursday till midnight the next day, and during the period Walmart sold about 5000 products a second.
+In total, its cash registers conducted nearly 100 million transactions on Black Friday.
+Representative of the corporation, Dan Fogelman, asserted in an interview with a left-wing site, the Huffington Post, that a total of "less than five" Walmart employees left the workplace, and the protest act was just "another PR trick" of the union that organized it.
+"Free cash register!"
+Protests continued this week in New York, where their object was not Walmart (they're not so welcome in the progressive city, that is why they don't exist here yet), but McDonald's and other cheap restaurants.
+McDonald's says that it sells billions of portions, and despite this it doesn't even give you sick days or pay you for honest work!
+Jumaane Williams, member of the City Council of New York
+At the moment, the minimum salary according to federal and NY law is 7.25 dollars an hour.
+Fast food restaurants increase it with time, but very little. On average their ordinary employees in New York earn 8.90 dollars/hour.
+Nobody earns less in this expensive city.
+I cannot understand how one can survive in New York on this money.
+Once upon a time, almost a fourth of American teenagers went through McDonald's, working part-time after school, living with parents.
+Few saw this as a source of living or planned to stay there for long.
+Now I continuously come across interviews with McDonald's employees, who complain that they have to survive on this salary and sometimes even feed their children.
+On the other hand, there is a comment on the Wall Street Journal forum, whose author notes that it is irresponsible to have children if you do not know how you will feed them.
+Participants of the protest that began at 6.30 a.m. on Thursday near the McDonald's on 40th street and Madison Avenue demanded that cashiers and cooks of the fast food chain be paid at least 15 dollars/hour, i.e. more than double their present wages.
+They also demanded the creation of unions in the fast food industry.
+American law prohibits the administration from preventing this or punishing activists of the union movement by nagging or firing.
+On the other hand, the administration does not often ease their life.
+But for objective reasons it is hard to cover fast food with a union.
+One of them is the unusual turnover of employees.
+Disagreeing
+Noisy protests began on this day in a number of other cheap restaurants in Manhattan.
+The highlight of the action was the afternoon meeting near McDonald's by Times Square, where several local democratic politicians spoke out. One of them, Jumaane Williams, said: "McDonald's claims it sells billions of portions, and despite this it doesn't even give you sick days or pay you for honest work!"
+Demonstrators were supported by other prominent NY democrats, like Bill de Blasio, a candidate for NY city mayor, who said: "We need to voice our joint support for the fast food employees, so that they can achieve fair wages and economic wellbeing, which every New Yorker deserves!."
+According to the New York Times, this was the biggest action of this kind in the history of the American fast food industry.
+But only a few hundred people took part in it, and many of them were not fast food employees, which comprise tens of thousands of people in New York.
+It is unclear right now whether this will spark a mass movement.
+"At the moment, the mind cannot be deceived too well"
+Among modern technology fans a popular topic is augmented reality, lately seen primarily through the prism of special glasses.
+At first, a functional model was shown by Google in the summer, at its annual conference. Then, in November, it was announced that Microsoft filed an application for patent too.
+However, according to the conversation with the leader of the group of interactive 3D technologies in the Cambridge laboratory of Microsoft, Shahram Izadi, glasses are a thing of the past for scientists in this company.
+They are drawn by the prospect of manipulating virtual objects in the air with bare hands, creating virtual open spaces.
+- Please tell us, in simple terms, about the work your research group does.
+- We work on the interaction of people with machines, at the same time trying to expand the boundaries of this interaction.
+While people in general are stuck at working with pixels on a flat screen and sometimes pointing fingers at them.
+We want to look 5-10 years into the future and predict cardinal changes in this interaction.
+For example, Xbox and Kinect sensors are a step forward. Almost no Xbox is sold without Kinect today, because everyone likes control by gestures.
+- What else awaits us in the future?
+- Despite the fact that Kinect shifted the interaction to the physical level, much still occurs on a flat screen, sometimes in 3D.
+Information entry has improved (the system receives more data), but output still needs to get better.
+We are trying to change this, working on truly three-dimensional display systems based on various technologies, including projection technologies.
+We need to let the computer world into our physical world, make it more tangible.
+But for this, we need to identify both the user and the space around him.
+Then we will be able to supplement the real world with virtual objects in a much more convenient form.
+Above all, get rid of these stupid virtual reality helmets!
+- What do you think about voice control?
+It's a popular thing, but is it overestimated?
+- It clearly cannot be called a cure-for-all - there's a question of privacy, because we do not always want to let the others know about our actions and intentions.
+In reality, all types of interaction with computers are good, but each in their own niche.
+For example, we had a project to control devices in public places, in which we thought about movements, not wide movements, but small, reserved ones.
+Movements were not recorded by a camera, but by a hand bracelet that determined the movement of bones and muscles.
+It's big right now, but in theory it can be reduced to the size of a hand watch.
+In general, the future lies in the mixed control, e.g. movement + voice.
+- What do you mean?
+- For example, how would you ask me to give you this bottle of water?
+You will talk and show at the same time.
+- Usually I just say.
+- Oh, that will be very hard to detect.
+- So you want to make the users adapt to what the machine can or cannot do at that moment?
+- Not necessarily, but it is a mutual approximation.
+I think in the near future, we will mainly work on developing new sensors that will enable more precise determination of a person's reaction.
+This could be, e.g. laser sensors. They have a decent depth resolution, which is very important.
+- If we talk about your work with Xbox Kinect sensors, what are your complaints about modern cameras?
+Not enough resolution, depth or something else?
+- In general, the current generation is what we can base ourselves on in working on three-dimensional recognition.
+Of course, it would be good to have eight mega pixels with 1000 k/s speed.
+It's not just the mega pixels, though, but the quality of the matrix and the depth.
+From the latter point of view, all current technologies are not good enough for us - this adds work to the algorithm designers.
+So it's important to remember about the resolution on the X, Y, but also the Z axis.
+Speed, the number of images per second, is also very important.
+Human movements are relatively dynamic, and the current 30 k/s is really not enough, especially for gestures.
+Steven Bathiche from our Redmond laboratory created a touch sensor with a regulated processing delay from 1 to 100 ms, while modern serial sensors are closer to the latter indicator (60-100).
+Not everyone understands how this affects the interaction between man and machine.
+In my work, it would be very useful to have a device that does not require touching and would have more images per second.
+- Does the number of cameras need to be increased?
+- In Kinect there are three cameras now, one of which is actually an infrared emitter and the second one, the recipient of the signal.
+The third one is actually a regular sensor of visible range.
+It is not applied to determine the object's depth.
+Potentially, a large number of cameras could solve the problem...
+Or make it worse, by increasing the required volume of calculations.
+It would be nice to create a flexible analogue Kinect, play with the flexion of camera disposition and see how this will help in three-dimensional determination of the position.
+- As far as I remember, Microsoft did not present its glasses to the public, unlike Google.
+Don't you think this is one of the most promising platforms from the point of view the everyday use of augmented reality technologies?
+- Certainly it is not very convenient to walk around with a smart phone in your raised hands all the time, but I think that the coolest option would be "transitional" augmented reality, where you could shift from glasses to smart phone, projection display, and everywhere else based on a cloud platform.
+Glasses are a very personal device, that is their strength (private things are seen only by you) and, at the same time, their weakness - augmented reality based on glasses will not allow you to work on virtual objects together with other people.
+- Let us imagine for a minute that manipulation of virtual holographic objects in the air is available not only to Tony Stark from Ironman, but to a regular person.
+There is one problem with this idea that the critics often point out: no tactile feedback!
+Hands feel nothing!
+What answers does your group prepare to this challenge?
+- In my lectures I often say that augmented reality is the seventh consecutive attempt at the interaction between man and machine.
+I think that the eighth will probably be the addition of tactile sensations.
+For now, one of the interesting tricks is to use the second hand as a sort of matrix for the image.
+It is great at registering pushes!
+But there are technologies that are really aimed at giving these "images in the air" a sense of tangibility, for example, the interference of several targeted ultrasound rays in a specific point where the finger is located gives a sensation, but very weak right now, as if someone blew on your fingertip.
+There are also wrist bracelets that affect the nerve endings in fingers, which is also a promising area.
+- Have you tried to deceive the mind?
+To force it to think that it feels something that it should be feeling when it sees something?
+- This is a good idea and we haven't tried this yet.
+It conceals one challenge that will not be solved so quickly - how to force a person, who is physically in a very limited space to believe that he is walking along an open, almost limitless space; we are working on the concept of treadmills (not at all like in clubs), moving platforms, and giant balloons.
+So far deceiving the mind has had limited success, there's work for many years to come.
+That's what makes working on virtual reality so attractive to researchers - many things are in their very beginnings.
+Judgement calls instead of culture - Rosbalt.ru
+Rosbalt continues the project St. Petersburg Avant-garde, dedicated to residents who are ahead, in the avant-garde of culture and art.
+This top list already includes outstanding figures of the art scene of St. Petersburg, whose achievements reach beyond the scope of the city, often recognized in Europe, bypassing fame in Russia.
+The new player in Rosbalt - the bold artist Kirill Miller.
+The whole city knows Kirill Miller, a bearded man dressed all in red, who can be seen by the Russian Museum, or by the Summer Garden, or at fashionable parties and shows.
+Kirill Miller's work always brings in crowds of people, no matter where they are exhibited.
+Kirill Miller is one of the purely St. Petersburg social and philosophical storytellers and creators of new mythology.
+Kirill Miller is an outstanding man of the St. Petersburg avant-garde of the late 80's early 90's.
+Moreover, he is a city man, who makes people smile on the street and lifts up everyone's spirit.
+Recently he took up the street organ and became St. Petersburg's music man, because he was ready for this complex role with all his Bohemian existence, philosophy and image.
+- Kirill, why do you walk around the city all in red, not yellow or turquoise, for example?
+- I chose the colour red as a fashion designer engaged in look and image.
+In this world, red is a compromise between artist, image-maker and society.
+Although in society, everything that is not grey causes aggression and agitation of the bad kind.
+But my provocations are aimed at starting conversation.
+The whole history of my provocative actions is an invitation to discussion.
+- When did you realise that you must be an artist?
+- At an exhibition in the Nevsky House of Culture, where my work was displayed.
+It became clear to me that this is my path.
+Then, the wave of older free, unofficial artists was gone, while new, free artists like me were not understood.
+I became friends with the artists of the new wave, with post-Gaza-Nevsky style artists ("post-gazonevschina"), which led to Pushkinskaya-10, and the wave was no longer.
+I'm drawn to theatre, clothing, music, all genres except for literature.
+- And all this has been united in your Art-clinic... - It was important for me to find myself in the centre of the culture of St. Petersburg, where all the best creative forces should come together.
+In 1995, I occupied the territory on Pushkinskaya-10, and while the renovation work had not started, there was a musical and creative club, a Bohemian club, the house of the St. Petersburg Bohemia.
+Many were born there: NOMy, Tequila Jazz, I remember when Shnur was brought there with the Van Gogh's Ear project.
+Shnur and his friends lip sang easy songs, wearing tight leotards, and the now trendy composer Igor Vdovin was with them.
+When the group began to play live, it became Leningrad.
+Trakhtenberg was the presenter of many programs before Hali-Gali times.
+We gave them Trakhtenberg, and a great career was on its way, but the basic education and mentoring he received from us.
+Gallery D 137, Griboyedov club - all these echo the Art-clinic.
+That is where our staff and regular customers left for.
+I am a hero of the last century, when culture meant something.
+In 2000, there was a poll in the press, for the People of Our City prize.
+I was nominated Artist of the Year, my climax came to an end.
+In the new times, it is uncomfortable to work by old rules. I'm a man of truth, honesty and culture of the last century.
+In our time, it is easy to become popular, but culture and popularity are different. You can be popular, but not very cultural.
+- Your work is marked by a recognizable style.
+- Many of my works are hits, with clearly reflected relevance and acuity.
+I will have a programme exhibit, "Russian museum in clowns."
+Clowns are a timeless category.
+I was social before, now it is painful and scary to be like that.
+But everything is blurred in clowns, tragedy is removed.
+I like grotesque, I have grotesque ideas.
+For example, saving the world by totalitarian changing of clothes by order.
+Nowadays, people are judged by appearance, not their inner qualities.
+Who knows, maybe you cannot shake his hand, and need to spit in his face.
+And the lie will go away with the help of changing clothes.
+- Recently we saw you in the role of music man. - A cultural city should have such a character.
+Who fits the role better than I?
+- Maybe commercial art can also be beautiful?
+- Nowadays, commercial art should be neat, considerate, sweet.
+There is a disintegration of cultures.
+People used to get together in flocks, Bohemians liked one thing, the simple people, something else.
+Now, everybody is divided into micro societies, it's hard to be liked by everyone.
+I am not a hundred dollar bill to please all.
+Now you have to think who you will please.
+Now, each cult hero has 100 fans.
+- But several thousand come to Stas Mikhailov!
+- The cast-outs go to see him, the sexual-social sphere is at work there.
+But 300 people will come for culture, not 10,000. In the end, there's less management, money, everything dies out.
+I have fans; the main thing is not to betray them, not to spoil what I have earned.
+In my youth, I painted such art that one collector had it hanging on the same wall with Falk and Larionov.
+I started with paintings, which people usually end with.
+Concepts are often mixed up these days.
+People say: spiritual culture, consumer culture.
+There is no culture in consumerism, it's "from another opera."
+I am a man of yesterday's culture. I grew up on examples of artists who lived poor and died in poverty, refused money for the sake of painting.
+This is the culture I'm for.
+- Kirill, what is St. Petersburg missing?
+- Good cultural experts.
+There is such a thing: an official for culture.
+But not everyone can be engaged in culture.
+Under the right rulers everything was different. Kings may not have understood culture very well, but they understood that they needed to stick with the right experts.
+There are good consultants in Moscow right now.
+Here in St. Petersburg, there are people who could be experts, but they are pushed to the side, because more advanced experts are needed, who will correctly evaluate these experts and give way to them.
+Judgement calls are what thrive now.
+Even Erart, but they're different because they say honestly that we don't accept all modern art. There are some artists, who need to find other museums for themselves.
+- What does St. Petersburg mean to you?
+- St. Petersburg is not a cultural capital, Moscow has much more culture, there is bedrock there.
+It's hard for art to grow on our rocks.
+We need cultural bedrock, but we now have more writers than readers. This is wrong.
+In Europe, there are many curious people, who go to art exhibits, concerts.
+Here, this layer is thin.
+We need to make art fashionable, as it was in the beginning of last century.
+The project is supported by the St. Petersburg grant.
+Give birth in space
+The earth is in danger.
+Global warming or an encounter with a killer asteroid.
+Caravans of cosmic ships with humans on board leave in search of a replacement planet.
+To save humanity, the question is how to propagate our race in conditions of weightlessness or on that replacement planet?
+I think the choice is small.
+There are only two actual planets that can be explored even hypothetically.
+"Venus and Mars," says Senior Researcher of the P.K. Shternberg State Astronomy Institute (GAISh) Vladimir Surdin.
+But while conditions on Mars are more appropriate for life, Venus has 500-degree temperatures.
+Life is possible only at a high altitude or on the orbit of Venus... in space.
+The question of reproduction in space began with flora.
+Half a century ago, experiments were run on plants.
+Four generations of peas grown in orbit were no different from their earth counterparts.
+Then, insects were bred in orbit, small fruit flies.
+In 1979, quail eggs were sent to space, to check how an embryo develops in weightlessness.
+We get an absolutely normal chick.
+But then the problem begins.
+"The problem is related to the fact that this chick needs to find support, needs to get on its feet and start moving," explains head of the laboratory of the Institute of Medical and Biological Problems (IMBP) RAN Vladimir Sychev.
+Having found no support, chicks were tumbling around in disorder.
+After 10 hours, the newborns experienced complete atrophy of instincts.
+Chicks did not react to light and sound.
+And the problem was that they simply died after four days.
+"We bred chicks twice there, and then stopped, because it is impossible to work with them there," says Vladimir Sychev, confirming the failure of the experiment with chicks in space.
+The last biological "mini-ark" with animals flew into orbit 16 years ago.
+In spring 2013, experiments will continue.
+However, only same-sex beings will be on the Bion bio-satellite.
+There was an experiment with rats, who were sent to space with foetus.
+In principle, there was nothing extraordinary there.
+"This was on bio-satellites, but again, it was a singular experiment and such research needs to be conducted," says Vladimir Sychev.
+After landing, the cosmic rats had babies.
+But it's hard to solve the problem of reproduction directly in space.
+It's not an easy task.
+Animals simply cannot follow their sexual instinct, when they're out of their familiar environment.
+In principle, people, unlike animals, can.
+Homo sapiens have abstract thinking, and are able to create a fitting emotional background.
+Such experiments are not conducted for ethical reasons.
+But women have been flying to space for 50 years.
+The biggest risk was for Tereshkova.
+The most valuable thing for humanity is the female body.
+Our "Seagull" left and nobody on earth could tell whether she would be OK after flying to space.
+Whether she will be able to give birth after this flight.
+"Nobody answered this question," says rocket and space industry veteran, Vakhtang Vachnadze.
+In June 1964, only a year after flying to space, the first woman in space Valentina Tereshkova gave birth to a daughter.
+The child's father, Andrian Nikolaev, was also a cosmonaut.
+In 1988, the second woman cosmonaut, Svetlana Savitskaya, who went into orbit twice and even worked in open space, gave birth to a son.
+However, the risk remains.
+We have few, very few cosmonauts, who were OK and had healthy children after long flights.
+"What's more, it is dangerous even for orbital flights," adds pilot and cosmonaut, Hero of the USSR, Hero of Russia, Valery Poliakov.
+And yet, humanity needs to seek out some new avenues in biotechnologies, protection from radiation, creation of artificial gravity.
+Hydro-laboratory of CPK - mandatory phase of training for a flight.
+Here, cosmonauts practice skills of working in open space in zero-gravity conditions.
+Water imitates weightlessness.
+If for adults water is a foreign medium, although comfortable, for infants it is a native element.
+Small amphibians seem to confirm that life came to land from the ocean.
+There is a connection with the fact that an infant spends about 9 months in amniotic fluid in the womb; it is easier to get used to water after that.
+In principle, it is logical, because only two weeks pass from birth until the first bathing.
+"This is very little time to forget something," says infant swimming instructor Marina Aksenova.
+In other words, if for a newborn weightlessness is more natural, a woman needs gravity, earth's pull.
+Stomach and pelvic muscles usually quickly degenerate in weightlessness; the ability to push out the embryo is reduced.
+Well, let's assume that childbirth stimulators will work out.
+Maybe she will push out the baby in a special room.
+"Then what?" - asks Valery Poliakov about this non-trivial issue.
+On the other hand, a baby also needs artificial gravity.
+When a body does not feel the earth's pull, it does not form the skeletal and muscular system.
+It is not possible to dress a newborn in orbit into a special loading suit for training, as they do with adults.
+He will simply not have what he needs to survive.
+"And this experiment, that we will go for by allowing the birth of a child in a foreign environment, will lead to us bringing a handicapped, completely unadapted human to earth," predicts Chairman of the Committee on Bioethics IMBP RAN Igor Pestov.
+For the moment, birth of children in space is just a theory.
+However, with time, it will become reality, when earthlings will go to a faraway planet in their ships, and it will become the home for their offspring, who were born in space.
+NKU Head: Svarc System audit has failed because of politicians.
+The Czech Republic has sound control bodies and a good standard of legislation when it comes to public contracts, but it lags behind in their application.
+This was said by Miloslav Kala, vice-president of the Supreme Audit Office (NKU) in an interview for Aktualne.cz.
+"The Law will never be perfect, but its application should be just - this is what we are missing, in my opinion," states Kala, commenting on the current situation.
+Similar conclusions are also reached by the joint audit from the Czech and German auditors.
+As an example of improper practice, they cite Petr Necas's approach to the so-called "Svarc System."
+The Prime Minister recently claimed that the ODS will not be burdening business owners with its checks - so is it forbidden or allowed?
+"The Law must be set out one way or the other and if it prohibits something, then even the Government's head cannot prevent the work of its department, which is duty-bound to monitor and enforce," asserts Kala.
+At the audit committee's session in the House of Deputies, you spoke about a joint project between the Czech Republic and Germany, within which legislation relating to public contracts in both countries was compared.
+What exactly was this about?
+This is about parallel auditing, which we began around two years ago.
+Simply put, this is about how European legislation governs the handling of public contracts, followed by individual state legislations and then the actual practice itself.
+We brought all this together, and although the audit is not yet complete, some very interesting differences have become apparent - in general terms, our legislation might be even "more concise and complete," however the actual practice is in certain aspects better in Germany.
+This confirms that creating more and more concise rules is not enough, and that attention must be paid to the actual application of these laws.
+What does this project actually help you with, and what do you think its outcome will bring?
+This kind of joint audit could contribute to curtailing these efforts to specify our law, to reduce and perfect boundaries, when it does not have such a positive impact.
+Economy means acquiring the required thing at a reasonable (which does not always mean the lowest) price, so that profiteering and possible criminal proceedings may be avoided.
+However, just because we have reduced the order limits, does not mean something will be procured.
+The system might become overloaded with the amount of paperwork, and those, who wish to look for loopholes in it, will be able to take advantage far more easily than if the limits had remained higher.
+These are domestic problems about the practical implementation of legislation relating to public contracts.
+How does the audit system work in Germany?
+Is there an office like the NKU, or is it organised differently?
+As far as the office is concerned, the Bundesrechnungshof functions like our NKU, and it is organised like ours, it also has a committee although it is appointed slightly differently, but basically both offices operate similarly.
+Powers are also similar to a degree, though of course Germany is organised federally, so these courts of auditors are also at the member state levels - in this respect their system slightly differs from our own.
+The BRH can only audit federal money, known to us as state funds.
+Public funds, which, for us, are administered by regional and municipal authorities, are audited by the federal courts of auditors there.
+When it comes to their legislation, is it more straightforward than ours?
+Overall, I would not like to make a comparison without any specific data, nevertheless in certain respects Germany serves as an example, but it certainly cannot be said that it is better in every aspect.
+Is this because, perhaps, they have better enforcement?
+That is certainly not true, but again, I prefer not to make such comparisons.
+It should be said that even in a country we perceive as exemplary, they encounter a whole range of problems.
+If that were not the case, they would gain nothing from working with our office, would they?
+Coming back to domestic legislation, what did the amendment to public contracts legislation mean for your office, is its impact being felt already?
+The period since the amendment came into force has been quite short, so it has not manifested itself in our audit work yet.
+Since we carry out our audits ex-post, a certain delay has to be taken into account.
+As yet, we have only observed it within the process of preparing future audits - we have launched our new "fiscal failure risk detection" system, with which we have processed almost 14 thousand public contracts, and these have been analysed - that is where changes will clearly be seen, because of the changed limits, the adjusted conditions governing certain types of selection processes, and so on.
+So do you see the adoption of this legislation as a benefit, or rather as another burden on the bureaucratic system?
+I believe this legislation is a step in the right direction, and I hope this will be confirmed.
+A problem, which may arise here, is that the law becomes "too constrained" and will not be enforceable.
+Under the previous rules, parties being audited were already bound by their audit provider (for example, in the case of regional operational programmes, the regional office) to the fact that every infringement of public contracts law means a breach of budgetary discipline.
+Is it worth constraining the law in this way, in that case?
+I do not think this is the way.
+The system should prevent those who want to attack and abuse it, but not penalise those, who make a mistake on a technicality, which does not affect the final decision.
+This kind of system will only increase pressure on bureaucracy.
+So how can we get out of this?
+Let's see where this dead-end takes us.
+The Prime Minister recently said the ODS will not be burdening businessmen with audits of the so-called "Svarc System" - what does this mean?
+Is the Svarc System prohibited or allowed?
+The Law must be set out one way or the other, and if it prohibits something, then even the Government's head cannot prevent the work of its department, which is duty-bound to monitor and enforce.
+He may say: "Let us change this law and relax it," but he cannot say we should pretend it is not there.
+The law on public contracts has relatively strict rules about the formalities which must be adhered to - which is the right way to ensure public tenders are protected.
+On the other hand, it is a tragedy, when a bidder with the best offer is excluded on a technicality.
+The Law will never be perfect, but its application should be just - this is what we are missing, in my opinion.
+Roads are icy in places, but mostly passable.
+In several places in the Czech Republic, the main roads are icy and snowy.
+However, the majority of roads are passable, with extra care needed in places.
+Carlsbad region
+In the Carlsbad region, the roads have been usable this morning, though in some places they were icy and snowy.
+The temperature has dropped to between five and ten degrees below zero, though it is expected to get warm slightly during the day.
+Snowing in the region has stopped, and only a thin layer of snow remains in the lowlands.
+However, the ridges of the Krusne Mountains have around 30 centimetres of snow.
+In some locations there is limited visibility due to mist, according to the local highway service.
+The R6 high-speed motorway and primary roads in the region are now usable without restriction.
+Caution is, of course, appropriate, for example, on certain bridges, where the surface can be icy and slippery.
+All secondary and tertiary roads are also passable, including mountain roads.
+In certain stretches of these roads there might be remaining frozen and compacted snow patches.
+Above all, at higher levels, extra care should be taken while driving.
+Pardubice and Hradec Kralove region
+On some roads in Eastern Bohemia, there might be a risk of black ice, at higher altitudes and in the mountains there might be a layer of compacted snow, according to the Road and Motorway Directorate.
+The highway service is warning the drivers against black ice, which might occur at higher altitudes of the Pardubice region in particular.
+Black ice may occur around Lanskroun, Usti nad Orlici, Policky, Svitavy, and Vysoke Myto, and particularly on secondary and tertiary roads.
+The I/43 and I/34 roads have been chemically treated around Svitavy.
+Snow is particularly affecting the roads in the Krkonose and Orlicke mountains.
+At higher altitudes, there is a compacted snow layer on the roads around Rychnov nad Kneznou and Trutnov.
+In Eastern Bohemia the day will be mostly clear to partly cloudy, and dry.
+Temperatures will be between minus three and plus one degree Celsius mostly, with a light wind.
+Pilsen region
+The roads in the Pilsen region have been usable this morning, with extra care needed in some places. Drivers should take the weather conditions into account.
+The morning will be frosty, with temperatures ranging between three and nine degrees below zero.
+Due to the existing snow and subsequent drop in temperature, certain roads may be icy.
+Drivers should expect mist in places, though visibility will gradually improve.
+This information was reported by the region's highway service.
+The D5 motorway is drivable almost without restriction, but the road services recommend extra caution between the 80th and 131st kilometre marks.
+Most primary road surfaces are dry and frost-free.
+Southern areas of the Pilsen and Tachov regions may have icy patches.
+Secondary and tertiary roads are wet, and may therefore also have icy patches.
+Drivers should be cautious especially on less frequented roads in the Bohemian Forest.
+Olomouc region
+Drivers should expect snow slush on the roads if heading for the higher parts of the Olomouc region.
+It is a result of the chemical treatment carried out at Cervenohorkse sedlo and on the way to Videlsky Kriz.
+Snowploughs were brought out by falling snow overnight, the Sumperk region, according to highway maintenance, got around three centimetres of snow.
+In other parts of the region, roads are mainly passable without restrictions.
+"In the Sumperk region, traces of snow have remained at the highest altitudes.
+Drivers should expect snow slush at Cervenohorske sedlo in the direction of Jesenik," the dispatch officer for the Sumperk highway service told CTK today.
+Their Jesenik counterparts also made an outing overnight; the roads all the way to the highest altitudes are now clear and wet following the chemical treatment, according to them.
+The Olomouc region's roads are usable without restriction, while in the area of Sternberk drivers should beware in wooded areas, where roads have remained wet.
+Usti nad Labem region, Liberec region
+Since this morning, the snowploughs have reported several places, which are difficult to pass in northern Bohemia.
+Besides certain snow-covered places, or some icy frost patches, the mountain road from Telnice to Kninice in the Usti nad Labem region is also closed, according to the police database.
+Temperatures remain below zero and roads are likely to remain snowy and icy. In the lowlands, however, particularly southeast of the Central Bohemian Uplands, there are no problems and roads are mostly dry.
+No traffic hold-ups have so far been reported.
+Icy frost patches have been reported in particular by road maintenance around Steti.
+According to meteorologists the conditions for this were perfect - rain and melting snow during the day, with a clear night and freezing temperatures.
+Adverse conditions are expected on the main sections of the I/13 road between the Usti nad Labem and Liberec regions.
+The closure of the Telnice to Kninice road was caused by bent tree branches, which were weighed down to road level by snowfall.
+Simon Ornest: At the concerts we want a fusion of positive energy
+What is your opinion on the end of the world that might come in less than a month?
+It is just another startler, which we like to latch on to.
+Together with The Tap Tap band, we tend to joke about it, saying that we might be the only band on earth that could draw enough positive energy to hold off or avert the end of the world completely.
+In December you are even organising a unique series of three concerts against the end of the world.
+Can you give our readers some details on this?
+This is a nationwide fund-raising event, which we have been planning for the past two years.
+We decided to make use of the marketing potential of the end of the Mayan calendar, due on the 21st of December at 11:10 a.m.
+On the eve, the 20th of December, at 9pm, 3 concerts will take place in parallel in Prague, Brno, and Ostrava.
+They will end at around the time when Kiribati Island in the Pacific, which is 12 hours ahead of us, reaches the end of the Mayan calendar.
+Who came up with this idea?
+Initially it was probably my idea, later we worked all the details out with our designer, Honza Augusta.
+Apart from the fact that we want to collect enough positive energy to stop the end of the world, we also want to allow ourselves and the public to spare some thoughts for the state of our planet, when we, one day, hand it over to our children.
+On the occasion of the end of the Mayan calendar, we have also prepared a range of unique items, shoes, t-shirts, bags, and original keys against the end of the world, which can be purchased at www.e-tap.cz to support our cause.
+The Tap Tap band, together with other artists, also recorded the so-called anthem against the end of the world, called "The End of the World is cancelled."
+It is already well received on YouTube, will it figure at the fund-raising concerts?
+Of course, for the grand finale, as long as the world does not end beforehand.
+It will be sung by all the artists at all the three concerts at the same time.
+The anthem will also be featured in a unique live broadcast on Czech Television.
+The words were written and the role of Jesus in the video clip was played by Tomas Hanak, Xindl X also sings in it...
+How did you end up working with them?
+We collaborate also with other personalities of the Czech cultural scene, due to organising a lot of fund-raising events and concerts...
+We try to really get them involved in these projects.
+It turns out that most of them are interested and enjoy working with us.
+What will the proceeds from the concert against the end of the world go to?
+Equipping the wheelchair-accessible educational Studeo centre, which is already in its sixth year, in collaboration with the citizens association Tap from the Jedlicka Institute for the disabled.
+Tutors come in regularly to spend time with the Jedlicka Institute's students and run activities, which they enjoy and interest them.
+The students themselves do not have the funds to afford tutors, so we try to provide this for them in this way.
+Within the construction project at the Jedlicka Institute, a separate building is planned, which we can move into with this project.
+Every concert sees the appearance of several bands and artists.
+How do you select them?
+We have tried to compile a programme, which speaks for all ages, including children.
+For example, in Prague, Chinaski, Support Lesbiens, Illustratosphere with Dan Barta, The Tap Tap, Marian Bango and Jiri Suchy will appear.
+Further details can be found at www.kpks.cz.
+Are you planning any more "bombastic events" in the future?
+In May, we will be making our first appearance in the Prague Spring, so we will definitely be preparing a good line-up with some interesting guests.
+Next year, we would like to play at the Czech National House in New York, and I personally - since we will be in the USA - would like to build in appearances in Washington and Chicago.
+Your international plans are not modest; you have already performed, for instance, in Madrid, Brussels, London, and Moscow.
+The Tap Tap is nonetheless a band composed of handicapped people.
+How do you cope with these journeys in terms of logistics and organisation?
+It is not as scary as it might seem at first.
+We have five members in electric wheelchairs, which must be transported in the luggage area; we must also, of course, carry around with us a lot of luggage and instrument cases...
+Nevertheless, we have so far managed it without any problems, CSA and British Airways were well prepared for us, so much so that, on occasion, I was quite surprised.
+Even in Moscow, which we have just returned from, it all went smoothly.
+Thanks to these international trips, you will have had a chance to compare specific accessibility issues, public attitudes to disability and so on.
+What have been your experiences so far?
+After Madrid, Luxembourg, London and other places, where everything functions better than here, we have just witnessed that in the East everything is still in its beginnings.
+Compared to Prague, Moscow is rather inaccessible; it still remains unusual there for a person in an electric wheelchair to be travelling around the city centre on his or her own.
+Obvious things, such as giving wheelchairs priority in lifts, are not commonplace there.
+Fortunately, citizens associations are emerging there too that are trying to draw attention to the problems faced by people with disabilities.
+And on the other hand, where do we still lag behind more advanced countries?
+There are a lot of things, which we still lag behind on...
+It is important to mention that improvements to the current situation always depend on the efforts of the people who are affected.
+In London and Madrid it is completely natural for people with serious handicaps to be independently out in public, and they can use the toilets, go to the museum, or wherever...
+It is less common there for large groups of people with disabilities to actively take part in social life, in this respect with The Tap Tap we are a step ahead!
+Public respect or accessibility is one thing, but it is only when we can become famous athletes, artists, actors, politicians, or lawyers that things will really begin to change.
+So far there are only exceptional cases, people who are strong-willed.
+The Tap Tap band is currently very popular, but let us look back a few years, what prompted you in 1998 to form it?
+I began my job as a tutor at the Jedlicka Institute, where I was surrounded by a lot of young people, who were interested in doing something.
+Since I am a musician myself - among others I play the saxophone - I started a music club with a colleague.
+With time, as our moderator Ladya Angelovic says, it has grown a little out of our control (laugh).
+Your popularity has only come about in the last few years, or am I mistaken?
+It is true that we have been helped by creating ties to famous singers and also by our proactive work on promoting the band.
+We realised that work, which goes on unseen can be like it never existed.
+Thanks to funds from the European Union we can even afford top quality tutors, equipment and so on.
+Was it your goal to take The Tap Tap to such heights?
+From the outset, I felt there was potential to do things a little differently.
+Show business is filled with things, where one imitates the other.
+It is logical in its own way; all new things are taken in hesitantly and take a long time.
+Things, which are unique, are few and far between, but I would dare to claim that Tap Tap is one of those things.
+A person's first impression on seeing you is, of course, pity - it is a natural reaction...
+But that pity is simply wasted, because handicapped people are not abandoned and suffering beings, who need to be pitied.
+They are people, who can fully live life and blossom, assuming, of course, that they have the right environment for it.
+I say that when a person with a handicap succeeds in something, it is not just progress for them but for society as a whole.
+Has your success also been helped by your firm hand as a leader, as many people are suggesting?
+If we want to achieve top class work, we must be uncompromising in many things and require a certain level of discipline.
+I think this is to be expected.
+Some people come to us with a romantic idea and their head in the clouds, and when they find out they have to go to rehearsals twice a week, attend practice sessions and put up with a lot of time travelling to concerts, their enthusiasm quickly disappears.
+That is how it works everywhere, with every group that wants to work and wants to achieve something.
+The Tap Tap band currently has twenty members.
+How many of those were present at the beginning in 1998?
+Only one, Ladya Angelovic.
+We are an open group, people come and people go, this is unavoidable.
+Those who have the interest and the drive will always find our door open.
+The event takes place the day before the end of the world is expected, on Thursday 20.12.2012 from 9pm.
+The venues will be Praha Incheba, Brno Fleda, and Ostrava Plynojem with performances from 12 bands and other musicians from the Czech Republic.
+All three cities will be joined by a televised link-up at the evening's close for a united rendition of The Tap Tap's anthem "The End of the World is cancelled"
+The concert's goal is to raise funds to equip the STUDEO multi-functional wheel-chair accessible learning centre at the Jedlicka Institute in Prague in the sum of 25 million Czech crowns.
+Admission fee to the concert is 400 CZK, children under 12 years of age go free, tickets on sale from Bohemiaticket.
+Poland and the Cosmos.
+Last week the council of ministers of the European Space Agency admitted Poland as the twentieth member of the agency, being the second nation from the former Eastern Block (after the Czech Republic, which became a fully fledged member of the ESA on the 12th of November 2008).
+Poland began close cooperation with the ESA in 1994, and in the following years it has participated in a series of agency projects.
+Of course, Poland's path to the space had begun much earlier.
+Polish boffins devoted their time to space flight even before the Second World War, but were not always met with understanding.
+I look back, for instance, to the lecture of A Sternfeld in Warsaw's astronomy observatory, who, on the 6th of December 1933, presented ideas on his pioneering work Entry into space.
+The thoughts of the young engineer (born 1905) left his audience cold, and years later Sternfeld remembered that only Dr. Jan Gadomski had shown an interest in his work.
+In 1934, for his work Entry into space, Sternfeld received the Robert Esnault-Pelterie and Andre Louis Hirsch prize in France.
+The above mentioned Dr. Jan Gadomski (1899 - 1966) later became a strong promoter of astronomy and astronautics.
+He published hundreds of articles in Polish journals, and wrote a series of books on these scientific subjects.
+Gadomski became a world-known promoter of astronautics and his contribution was, notably, recognised when a crater on the far side of the Moon was named after him.
+In 1925, Poland had already built a handcar which was supposed to be fitted with a rocket engine.
+Unfortunately, both the project's designer, and the project's details, are unknown.
+It is not even clear, whether the rocket was intended to start the handcar or to slow it down.
+Information about this rail track is only known from press articles of the time.
+In 1933 the Polish artillery started their engagement in flying bombs.
+The research was undertaken by the Weapons Technology Division in collaboration with Prof. Mieczyslaw Wolfke and Prof. Gustaw Mokrzycki.
+From the documents, it is clear that the research reached the stage of practical tests.
+Of course, the advance of the German army interrupted the research.
+In 1937, the concept of a photoelectric homing rocket designed by engineer Rohozinski appeared in the trade press, and in the following year The Rocket - air torpedo and flying rocket-bomb appeared, authored by Leliwy-Krywoblocki.
+Both projects were destined for military use of rocket engines.
+Immediately prior to the War, all projects for military use of rocket technologies were overseen by the Provisional Scientific Advisory Board (Tymczasowy Komitet Doradczo-Naukowy) that coordinated all the work.
+The Board was appointed in 1937, but after two years of activity their operations were ended by the start of the War.
+Further work devoted to astronautics appeared in the Polish Press after the War thanks to the Polish Astronautics Company (Polskie Towarzystwo Astronautyczne).
+The first reference to the company figures in the November issue of the magazine Problems in 1954, in which four in-depth articles are on the subject of astronautics.
+In one of these, by Prof. Subotowicz, the establishment of a company is proposed, which would dedicate itself to astronautics.
+At the time, there were already projects underway for artificial satellites and it was clear that cosmic research was an emerging sector.
+From the beginning of 1956, the Polish Astronautics Company (PTA) sought entry to the International Astronautics Federation (est. 1951) and by autumn the PTA was already a full member.
+In the following year, the PTA's first chairman, Kazimierz Zarankiewicz (1902 - 1959) was appointed Deputy Chairman for the International Astronautics Federation.
+He served in this capacity until his death in 1959.
+From 1956, the PTA played a significant role in the successful development of meteorological rockets RM (Rakieta Meteorologiczna), which became the first Polish rocket to enable scientific research.
+The first RM-1 model was completed in 1957 and the first launch took place on the 10th of October 1958.
+The rocket, with a ceiling of 1800 metres, measured around 80 cm in length and weighed a little under 5 kg.
+Later, the improved RM-1A version was constructed and in the summer of 1959 launch tests were initiated for the two-stage RM-2 rocket in the Bledowsky Desert.
+The rocket was 1.4 metres in length and weighed approximately 11.5 kg.
+A further development model was designed for real scientific work - the RM-34 rocket was to reach 14.5 km and be tasked with monitoring high altitude winds.
+Of course, in 1962 further research was stopped.
+The successor to the RM rocket type was the Meteor-1 rocket, developed from 1962 to 1965.
+The rocket was designed as a two-stage rocket, with a total length of 510 cm and a launch weight of 32.5 kg.
+Three models were developed (designated Meteor-1A, -1B, and -1C), which differed in the room available for scientific apparatus.
+In the Meteor-1A rocket, a space of 0.4 litres was available, Meteor-1B had 0.34 litres, and Meteor-1C had 0.62 litres.
+The maximum altitude for all three models was 37km.
+Between 1965 and 1968, the development of Meteor-2 was underway in the Aeronautics Institute, with its first launch tests in October 1970.
+The Meteor-2 rocket had a launch weight of 380 kg, and was capable of lifting a useful load of 10 kg to a height of around 60km.
+Subsequently built models were the Meteor-2H and Meteor-3.
+Poland's admission to COSPAR (Committee for Space Research) in 1960 should be mentioned, as well as the appointment of a national COSPAR board two years later.
+Poland also participated in the Interkosmos space programme for space research on Soviet artificial satellites, and in 1978, the Polish pilot Miroslaw Hermaszewski became the second intercosmonaut after Vladimir Remkov.
+Abolishing the legislation on public works is not the solution.
+Last week the Constitutional Court abolished the law on public works.
+The resolution caused lively public debate.
+It will certainly be interesting to look at this issue from a broader perspective.
+Liberally oriented financial systems in the EU, just as those in the globalised world, are based on the principle of an unregulated economic competition.
+Its effect means that individual financial entities and national economic systems are in a state of permanent conflict among themselves.
+The cause is the principle of free trade and free, completely unregulated movement of private capital together with uncontrolled financial speculation.
+Due to significant labour cost differences (salaries) there is pressure on prices.
+On this basis, it should be understood that when a supplier tries to compete in a commercial tender by importing cheap goods, "the rug is pulled" from under the competition's prices to capture a greater market share and, in this way, increase its own profits.
+On a wider scale, this means most businesses must move production abroad, import cheaply from abroad, or close down. The result is high unemployment in countries where labour costs are high compared to other economies.
+Since private capital is not bound by social responsibility, and therefore also not by the unemployment it causes, the social costs born by the state must necessarily increase.
+The whole situation is bolstered by the businessman's complete unwillingness to pay taxes, which would alleviate the economical and social harm caused in the pursuit of profit.
+The situation is so well known that there is no need for actual statistical data.
+The ruthless private capital practices create particular economic situations, where the State in these countries is forced to enter in the mutual competition, aiming to artificially lower the social standard of its own citizens in order to attract foreign investment.
+In other words, governments stake their own citizens because of private capital while disregarding the drop in social standards.
+This occurs chiefly in amendments to existing law.
+The aim is to economically force the domestic population to accept prices dictated by private capital, especially in terms of salaries.
+On one hand, this economic system of force, in case of long-term unemployment, on the other, restricted employee rights in the workplace.
+This yields growing poverty and an increasing void between the poor and the rich.
+In Germany there are already a host of food hand-out centres for the poor, who are not able to feed themselves on their own wages.
+The number of these people is already in the millions.
+In the name of improving the competitiveness of the German economy, it commonly occurs that properly employed people receive such a salary that the State needs to top it up to the minimum wage.
+Just such a scandal was revealed in the case of auxiliary staff in the Bundestag.
+The austerity measures for all the southern EU states will undoubtedly lead to the same situation, where people are pressured by a catastrophic drop in living standards to emigrate as it was in the 19th century, or to eke out an existence on starvation wages on the edge of society, in the hope that the country will eventually see some foreign investment.
+At this point we have to ask where this may come from?
+If it is to come from other EU states, then poverty is being shifted from one country to another, or it will not come at all, because Chinese, Indian, Brazilian, Turkish, Moroccan, Egyptian, and African labour is still at a fraction of European wages.
+This applies to all of Latin America.
+Liberal theory and the Media incessantly claim that the State may not participate with capital in its own economy, and that a controlled economy leads to economic ruin.
+Private capital cruelly insists on the viewpoint that the State must not intervene in the economy.
+Thereupon, we should ask ourselves whether private capital has no influence, or whether it actually leads politics and thereby the whole country, for its own selfish ends.
+Here, the answer must be yes.
+The proof is the existence of the almost omnipotent, and in all states, omnipresent lobby.
+The result is a desperate situation manifesting itself through corruption, through mutual benefits and legislation, where almost everything is criminal, but nothing is punishable.
+In Germany the situation is such that state ministries, through lack of financial resources, contract out the drafting of laws to private law firms, who are basically connected with industry.
+These laws are then approved in the Bundestag.
+Real power does not come from the people as the Western-style constitutions claim, but from strong financial organisations that look after their own interests.
+It is clear that liberally-orientated democracies will now quickly reach a situation, as is described by Appian in his work on the Roman Republic Crisis in the time of Cesar and Pompei: "The State was already long in complete degeneration and its offices taken by force.
+With bribery, illegal acquisition of benefits, and with stones or swords.
+Bribery and corruption were rife and unhindered, and the people would vote for a result which had been bought" ..."people with principles did not run for office, so on one occasion the whole debacle meant the state went eight months without consuls.." .."There was actually talk about the only answer to this terrible situation being autocracy, and an energetic man should be elected." Appian had Pompei in mind, but it was Cesar who changed democracy for autocracy permanently.
+The conclusion, just as in antiquity, the current society is built on unscrupulous tendencies for personal gain without regard to the interests of society as a whole.
+Private capital in its present state is not able to understand the interests of society as a whole.
+The outcome is now, as it was then, an unprecedented decadence of the elite with no attempts whatsoever on deeper reaching reforms.
+The causality of the rise of the fascist and communist regimes should therefore be sought in the misguided liberalisation of the economic system in the 19th and 20th centuries.
+The current state of affairs, when we consider the demise of those systems in favour of liberalised democracy as an interlude, can expect its next cycle.
+The particularly catastrophic reality is that the current elite is completely ignoring the potential lost of hundreds of thousands of lives, humanitarian and social disasters, which we are already witnessing, as well as crimes against humanity, as we are familiar with from ancient and modern history.
+The abolition of the law on public works is not the answer, at least not in the long term.
+Under the pressure of economic competition, internationally as well as within Europe, the Government of the Czech Republic will be forced to pursue ways of lowering the population's living standards.
+This pattern is thus systemic.
+To address this, there are targeted political and social reforms, which strengthen the state's capital participation in the economy, increase the people's influence over the state and weaken the monopoly held by private capital over society in favour of the state.
+Israel: Chaos Lab.
+"Nothing comes from violence and nothing ever could," from Sting's Fragile, where one of the main verses from the refrain is "Lest we forget how fragile we are."
+"If my sons did not want war, there would be none," said the dying Gutle Schnapper, wife of Mayer Amschel Rothschild in 1849.
+The latest wave of violence between Israel and the Gaza strip, as always, has sparked a lot of reaction.
+Some stand by Israel, arguing it has the right to self-defence, and Palestinians are portrayed as terrorists, while others support the Palestinians, claiming racism by the Israeli state, claiming that genocide is being committed against Palestinian Arabs, and that Israel is a terrorist state.
+I do not want to dwell, in these repeated periodic waves of killing, on who is the transgressor and who is the victim, after all, today's inhabitants of Israel, including the self-governing territories, were born into the current political situation, and did not live through the start of the violence.
+I would like to offer the readers a peek behind the scenes, a look at whom, most of all, this 95-year long tension is serving (starting from Balfour's declaration in November 1917) on this small piece of land in the Middle East.
+Some of my thoughts are supported by available historical facts, while others are derived from my own understanding of who, that is, which group of people is the main source of events in modern history.
+Human history is in the first instance about the struggle for power.
+In every era we can find an Alexander the Great or a Napoleon.
+What is not quite so apparent is whether these were the people, who had chosen their path independently, or whether behind their throne stood someone who directed their actions towards a pre-calculated goal.
+We must accept that we live in a time when the world's wealth is concentrated into the hands of a few individuals, and that this concentration of wealth and the power it exudes could not happen in one generation's lifespan.
+Among these astronomically rich families, one stands out, which could be considered the puppet master (whether someone else stands above them, I am unsure, but I would not rule it out) - the Rothschilds.
+Not much is written about them.
+Understandably.
+The first news agency (Reuters) they bought in the 90's of the 19th century, in order to prevent their name being connected with acts of high criminality, which appeared in their background and which always meant securing power, increasing wealth, or both.
+They hold majority stakes in almost every central bank in the world, and against the countries, where they do not hold a stake, they are either waging or preparing for war (before the assault on Afghanistan it was 7 countries, after Iraq it was 5, after the overthrow of Kaddafi 4 remained, but in the meantime Russia submitted its central bank to the Russian Government).
+Whoever attempted to defy this family died.
+Abraham Lincoln refused to renew the status of the central bank to the Rothschild Bank of America, and during the Civil War he began to issue his own (that is state-issued) money and was assassinated in 1865 at the theatre.
+JFK began issuing his own money and wanted to close the Fed (Federal Reserve), and was killed in 1963, Congressman Louis McFadden was poisoned in 1936, after he had intended to sue the Fed for causing the Great Depression of 1929.
+Their thirst for global power led in the years of 1859 - 1871 to the formulation of a three-world-war plan by the freemason leader of the 33rd degree, Albert Pike.
+The first war was to remove the large monarchic state bodies in Europe, the second was to remove colonial rule, especially from Great Britain, and the third will reduce the world's population down to 0.5 - 1 billion people (this number of slaves will suffice for their comfort and luxury, and will not use up so many resources), the creation of one universal faith (ecumenism is just an appetiser for this solution), and finally the seizing of absolute power.
+The method, which the group of wealthy families with the Rothschilds leading the way, employ is the instigation of crises, followed by the offering of a solution (order ab chao - order from chaos).
+These solutions are false, however, and always lead to a worse situation (vide establishment of the Fed, so that the crisis of 1907 would not be repeated).
+Thus, having succeeded in assassinating Ferdinand, the Habsburg heir to the Austro-Hungarian throne, in Sarajevo thereby unleashing World War I, they destroyed tsarist Russia with the Bolshevik revolution.
+The First World War ended abruptly, militarily and economically unsubstantiated, with German capitulation (the war was no longer needed to destroy tsarist Russia) and the central European powers of Austria-Hungary were subsequently dismantled.
+To facilitate the inception of the Second World War, they allowed bankers and politicians to create a latent conflict situation by saddling Germany with huge war reparations, thereby making a radicalist example of the impoverished masses, it remained only to introduce a sufficiently convincing culprit and a leader with a simple solution, while also creating a multi-racial Czechoslovakia with a strong German minority to play, and indeed did, the role of a fifth colony, once the war had been ignited.
+At the end of the 19th Century, the Rothschilds instigated the establishment of the Zionist movement, one branch of which strove to form the Jewish State, seeking out an area of historic Judea, Jerusalem, to make its capital (the Return to Zion).
+The aforementioned Balfour Declaration formed the basis for the mass immigration of Jews to Palestine, where the first conflicts began with the local Arab population.
+Terrorist attacks occurred on both sides.
+World War II broke out, and whether Hitler broke free from the leash, which international bankers were holding him on, or whether his actions were all part of the plan, is difficult to determine, nevertheless the suffering of European Jews in the concentration camps created the foundation to the world's acceptance of the Jewish State.
+Israel was officially formed in 1948, and just as the war reparations for World War II were layed on Germany, the announcement of the State of Israel became the third war's hotbed.
+Provided the international bankers succeed, the Jewish Nation, as with the second, will be the victims on the front line, now together with the Arabic - or more generally, Muslim - population of the Middle East.
+Israel is like a huge laboratory, a source of discord and chaos not only within the country, but on an international level (just look at how strongly people are split into supporters and opponents of Israel).
+Who is the wrong-doer and who is the victim in the Palestine-Israel conflict, where injustice breeds injustice in an endless cycle of violence, while everything began from the greed of a few and their lust for global power?
+Here, we must differentiate between Israel's general population and their leaders, because, just as it happens here, the international bankers introduce their own selection of candidates for people to vote for.
+Israel's current prime minister, Netanyahu 'the hawk', is a typical example of a fascist politician, loyal to the international bankers, who does everything to instigate war with Iran, which would, due to its membership in the Shanghai Cooperation Organisation (China, India, Russia, Pakistan, ...) lead to a greater threat of global conflict, and through its control of the Hormuz Strait, where 20% of the world's oil must sail (the channel is only 2 miles wide), to the destruction of the world's economy.
+In what light stand the words, spoken by David Rockefeller in 1994: "All we need is a major crisis and the nations will accept the New World Order."
+The New World Order in their eyes is one of master and slave.
+A world where the rest of the human population serve the luxury of a handful of financial aristocrats.
+A world, where each new-born is implanted with a chip, which makes their existence completely subjugated.
+"He also forced everyone, small and great, rich and poor, free and slave, to receive a mark on his right hand or on his forehead, so that no one could buy or sell unless he had the mark, which is the name of the beast or the number of his name.
+If anyone has insight, let him calculate the number of the beast,
+for it is man's number. His number is six hundred and sixty six."
+Argo: When things are at their worst - call Hollywood.
+In November 1979, a mob of Islamic student demonstrators took over the American embassy in Tehran and held 52 diplomats hostage.
+They were to be released in exchange for the overthrown Shah Mohammad Reza Pahlavi, who fled after the revolution to the USA, which had actually supported his regime for several decades.
+For the American administration the situation did not offer a positive solution - it could not throw the Shah overboard, because this would seriously jeopardise the trust of other allied countries.
+The release of the hostages in Iran, where the revolution resulted in the establishment of the theocratic regime, could not be achieved.
+This was a blow to the prestige of the United States, which was later compounded by the fiasco of attempting to free the hostages by force.
+The incarcerated diplomats were finally released after 444 days, following negotiations mediated by the Algerian government.
+Their ordeal provoked a wave of solidarity and anti-Iranian feelings at home.
+The debacle in Iran significantly influenced Jimmy Carter's loss with Ronald Reagan in the 1980 presidential elections.
+The film Argo, directed by the actor Ben Affleck, recounts one episode in this story, which brought America a small victory.
+Just before the embassy was seized, six employees escaped.
+After some peripeteia, they ended up in the Canadian ambassador's residence.
+The CIA, in collaboration with the Canadian authorities, succeeded in getting them out of Iran, helped by an extravagant cover story - they left on Canadian passports as members of a film crew, who were surveying locations for a sci-fi blockbuster.
+A combination of genres
+The plan, conceived by "exfiltration" expert, Tony Mendez, required the assistance of Hollywood.
+For the story to be believed, the film project was reported on in specialist magazines, press conferences were organised, and the fictitious production company had a real office.
+The details of the operation were, for a long time, kept secret; the film draws on the memories of Tony Mendez.
+Affleck's film is a peculiar mix of genres.
+On one hand, there is a realistic incisive political thriller, and at the same time a "caper movie," with small victories and double-crossing - another example would be Ocean's Eleven.
+The mood alternates in the film - on one side, sharp documentary-style sequences in Tehran (the title sequence shows iconic photos from news of the time, relating to the same events portrayed in the film - there are no big differences).
+On the other hand, lighter sections from Hollywood, laced with irony and a little exaggeration.
+Then there are scenes from the CIA headquarters and other agencies - men in suits debating the situation around meeting tables, in office corridors, over the phone...
+Ben Affleck has managed to restart his career in extraordinary style.
+The derided actor has become a respected director, and his acting is no longer the target of ironic comments.
+Argo is his third big-screen movie, following his dark crime movie Gone Baby Gone (2007) and the thriller The Town (2010).
+It is also Affleck's first picture, which does not take place in the director's hometown of Boston.
+The atmospheric feel in different locations is one of the characteristics, which took his earlier films above Hollywood standards.
+Affleck shows it in Argo, where Tehran is "featured" by Canada.
+The best scenes of the film take place in the streets, in the reconstruction of real events - the opening sequence of the siege on the embassy is impressively lucid, creating at once feelings of confusion and surprise, which come flooding in, as history suddenly takes a turn.
+A similar effect is achieved by Affleck and his team in the fictitious scenes (the fake staff at the Tehran bazaar).
+Too much action in too many places
+The director had to tackle the issue that the story being told does not offer many nail-biting scenes for the film.
+What little there is, is worked well, with some occasional embellishments to reality - these do not all come off so elegantly (the scene, where a looming crisis is averted at Tehran airport by a phone call in America, followed by a chase on the runway seems quite far-fetched).
+Argo's weakness is its divergence, which comes from the need to show too many events in too many places.
+Alan Arkin and John Goodman play their roles as the Hollywood assistants with great charm; their characters deserve more room, and are not by far the only ones in this film.
+Affleck's film loses the dramatic pull a little, it is a film, which can be watched with reasonable interest, its production and retro-style are evocative of thrillers from the 70's.
+It does not really captivate.
+As a reminder of history's particular ways and a testimony to how exaggerated the views are on the all-powerful all-controlling secret services, this will do.
+Rules for blowing up balloons, for bananas and a circus
+The www.bankovnipoplatky.com server, which issues a poll every year on the most absurd bank charge, has now decided to announce a competition for "the most absurd regulation or proposal from the EU."
+"We were prompted by the latest story, where the EU plans to take on a 40 percent quota of women at management level of Europe's largest companies," Patrik Nacher, the poll's organiser, told Pravo.
+Among the latest nominated absurdities, for instance, is the recent decision by the European Court to unify insurance premiums for men and women.
+Until now, women were favoured in life insurance prices, because they constitute a lower risk for insurers.
+"Other unbelievable ideas from the EU can be nominated by anyone until the end of the year.
+The actual voting will then take place until the end of February 2013," informed Nacher.
+Among the controversial EU regulations, we might include the mandatory addition of bio-ingredients to fuel, which consequently harms the environment, the ban on reliable mercury thermometers just because they contain a relatively small quantity of a toxic substance, or the rules on the size of chicken cages, which significantly raised egg prices this year.
+The ban on the use of the term "spreadable butter" and the withdrawal of classic light bulbs from sale had previously come under criticism.
+First rate bananas are to measure 14 centimetres
+The Union's machine often makes decisions under pressure from this or that commercial or industrial lobbying group, whose demands in Brussels are usually defended by state or group of states' interests (just as the Czech Republic is promoting the demands of its banks under threat of being vetoed).
+The lobby's interests were, for example, that bananas of the highest quality should measure at least 14 cm in the EU, and were not to display and "anomalous curvature."
+The European Commission defended itself, saying that it was only harmonising existing disjointed national standards, which complicated trading.
+Norms relating to fruit and vegetables have already been softened by the EU despite opposition from certain states, referring to the food waste caused by the existing directives.
+One possible prize-winner in the poll may be the last year's EU regulation according to which inflatable balloons must be sold with a warning that children under 8 years of age may not inflate them without parental supervision.
+Here, the EU pointed to an American research, which indicated that, among other toys, balloons are one of the main causes of child suffocation.
+A similar restriction now applies to children under 14 years of age using party blowers.
+Strange ideas are conceived at home too
+Fairly absurd is the rule relating to individual European officials - everyone in the EU, who holds an official post, may not use the term Macedonia due to it being a sensitive topic for Greece, and instead the acronym FYROM (Former Yugoslav Republic of Macedonia) should be used.
+The Bankovnipoplatky.com server in collaboration with the Liberal Economist Association, Laissez Faire, also nominated, aside from the aforementioned absurdities, for example the Union's regulation on the volume of food provision stocks held in an EU member state.
+The EU stipulated the maximum volumes of food provisions, which may be present within the CR on the day of our entry to the Union.
+The Czech Republic thereafter exceeded, for instance, the permitted volume of mushroom preserves, which incurred a high penalty.
+The poll's organisers were also impressed by the idea of paying certain countries because they do not have a coastline, or the suggestion of allocating funding for a request for funding.
+These ideas did not come from Brussels, however, but from Prague.
+"We are handicapped because we do not have the sea.
+We are asking the European Union for a refund," declared the minister for agriculture, back in autumn 2004, Jaroslav Palas (CSSD).
+His argument was that there had been a good harvest of cereals, and due to the so-called buy-out interventions, the state's warehouses were full and were forced to export.
+The Czech Republic is further away from a port, so according to Palas the EU should be paying us hundreds of millions of Euros.
+The European Commission finally met the CR halfway by organising a tender for the purchase of cereals from countries that do not have access to the sea.
+Funding to subsidise funding requests was offered to foreigners by the Ministry for Regional Development's minister, Pavel Nemec (US-DEU), specifically this was meant for making requests for funding from Brussels.
+EU: Bizarre legislation is the exception
+Regulations may well become the target of criticism among member states, but the EU's efforts at regulation, more effective operation, and development of the entire Union deserve recognition, according to a number of experts.
+A more important issue, according to experts, is the drawing of EU funds on projects, which have hardly anything in common with strengthening the European integration, but which was pushed through by member states during a budget meeting.
+Emotions flare among Czechs when, just as other countries in the Union, the CR must fight in Brussels for the right to particular labelling on its traditional products, in which it does not always succeed.
+The Czechs fought for six years with the Germans and Austrians to protect the labelling of their Olomoucke tvaruzky, however the tuzemsky rum, whose tradition reaches back to the 19th century here, had to be renamed tuzemak by the manufacturers.
+The appellation of rum can only be given to products distilled from cane sugar, and not sugar beet.
+Carlsbad wafers, Pohorelicky and Trebonsky carp, and Zatec hops have been added to the official list of registered products of the EU, alongside the world-renowned feta cheese and gorgonzola, German marzipan from Lubeck, and Parma ham.
+The EU's stamp of protection can also be proudly shown on Pardubice gingerbread and Horicky tubes.
+People want me to save the republic, but I am an amateur, says Okamura
+Senator, how does a person decide they want to run for President?
+This is not about me being a senator or president.
+If everything in our country worked without problems, then I would not be running for any post.
+I cannot watch any longer the country having been robbed over the past twenty years, thieves roaming about there and people's taxes and retirement age increasing.
+I had no ambition to be a politician.
+When I see something I do not like, though, I try to find a solution to change things.
+Since I have already turned forty, and I am an independent non-party man, I have no other choice to influence things but to stand for senator or president.
+You have already reached the Senate, but shortly after that you are taking off for the Castle.
+Are you not turning your back on those who voted for you in doing this?
+I have been saying the entire time that I would fight for the Castle based on the results in the Senate's elections.
+Later, I added that if I were elected as senator, I would be standing for president.
+My goal, though, is not the post, the post is a tool to allow my vision to be realised.
+Therefore, I need the greatest influence, and the strongest mandate possible.
+The trouble is not just that as a nation we swear in the pub or at the television, but that we trample anyone, who wants to try to change things.
+The Media add to this, misleading the public, and mistaking freedom of speech with freedom to lie.
+For example, I was allegedly bribing reporters, or I was allegedly an advisor of Jiri Paroubek.
+Let's talk about your vision.
+You set out on your castle siege with a thesis on the material and criminal responsibilities of politics, and a retroactive financial disclosure of assets over twenty million.
+You need to change the law for this.
+As president, though, you do not have this power, and only the Senate as a whole may propose laws.
+How are you going to solve this?
+When I lobbied, as a citizen, for tour guide work to be a free trade, it was successfully carried through.
+The problem is political squabbling - when someone comes with a good idea from the left or the right, it will be deliberately rejected, causing delays for the public.
+As an independent non-party man, I stand a far better chance of gaining support from all parliamentary sides.
+The advantage I hold is that without the political pigeonholing or dogmas I can take what is best for our country from any side, and apply it.
+Do you see yourself as a person from the right, or the left?
+From the Czech viewpoint, it seems they tend to put me to the left.
+For me, it just does not matter if it is a little to the left or right.
+The important part for me is moving forward.
+It is not about whether someone is from the left or right, I just want to bring people together.
+I always support any good public solutions, even if they are put forward by the KSCM or the ODS, and, in the same way, I will oppose bad ideas.
+You get angry when someone calls you a populist.
+Are you not confirming this with what you have stated?
+When you make a company business plan, you also have some ideal goal and vision.
+You try to come close to it.
+Some may call it populism, but all the proposals I speak about are already working elsewhere, or they have been put forward by experts.
+But without the support of the Parliament you will be left with just slogans.
+You will not last long in politics with that.
+Or do you believe that if you walk among the public and talk with them, that you will succeed, say, in passing criminal and material responsibility?
+I have no alternative.
+I need to convince politicians, reporters, and the public, and try to get them on my side, so we can put this through.
+If I were elected president, it would not be a problem to arrange a live television broadcast, where I ask the leaders of the parliamentary parties to pass a law on material and criminal responsibility for politicians, civil servants, judges, and the Attorney General.
+And, as the case may be, they would need to explain why they did not want this.
+When there is a strong figure to point out the issues, it just needs some pressure on the political scene.
+Take for instance the direct election of the president, it was achieved thanks to public pressure.
+I will say frankly that I am an amateur, I am not a genius or an intellectual.
+I am looking for allies to share my opinions and vision.
+I have just started out in politics, and I am looking for a majority support for my agenda.
+I will try to make things progress, but it if does not work out, in six years I will finish and return to the private sector.
+It sounds a little like Okamura is trying to save the Czech Republic.
+I am no saviour.
+I know that alone I will not achieve anything, so I have asked acquaintances, whether they would run for the senate.
+I went to Radim Jancura, who declined due to his workload.
+So I, at least, support investigative journalist, Jana Lorencova, who uncovered fraudulent activity with light heating oil.
+I put myself forward, because people are really discontented, but now I have my doubts.
+Sixty percent of people did not go to vote, and those who did mostly voted for leaders of the establishment.
+In the senate, there are only two independents, including me.
+People have voted for a senate that will make it difficult to enforce changes.
+Nonetheless, I will fight for my vision, for example, for the direct election of mayors or regional council presidents.
+Are you considering having your own party?
+I have not considered it yet, because I have neither the time to verify that every party member has a clean background, nor the money to do it.
+I have no money even for a presidential campaign, my transparent account holds just 20 thousand.
+You have no money?
+You are talking about financial disclosures, but what is yours like?
+I estimate my private assets to be around 60 million.
+In Prague, I have land worth around 25 million, an apartment worth ten million, another apartment worth eight million, an artwork collection worth around ten million, an Aston Martin worth 3.5 million, a Skoda Superb worth a million, and I have a few million in my account.
+I have the Aston Martin, by the way, because it was my dream as a boy - I always liked James Bond, who drove the car, was gallant with women and also fought against evil and villainy.
+You drive an Aston Martin, have assets worth 60 million, but you have no money for a campaign?
+You say you want to change the Republic, but you are not keen on putting your own money into it.
+This does not inspire much confidence.
+I do not have 15 million for a campaign.
+Should I take out a loan?
+I have already put 2.5 million into the campaign.
+The fact that I do not have any sponsors is evidence that there is no real interest in my programme.
+I have no obligation to pay for my own campaign.
+The expenditure on my campaign is basically covered by the pay I will be receiving as a senator.
+However, I would not be able to live on it, for instance, I could not pay for my son's English school, which costs 30 thousand a month.
+If I were only interested in making money, I would not be standing for election.
+So you will still be in business so that you can make a living?
+Did you not say you would be putting this on hold?
+This depends on the rate of pay.
+As I promised, my activities have been partially reduced.
+For example, my deputy is taking over as the CEO of the travel agency in spring.
+People would like me to be a Samaritan, who saves the Republic.
+But I must also live off something.
+As a businessman, what would you usually make monthly?
+Two hundred to 400 thousand, which I still do.
+And if I became president, then I would end my business activity.
+The full interview can be read in Saturday's issue of Pravo.
+The MVRDV architects prove that true adventures are not just in the head - drawing on the example of Spijkenisse and the recently erected Bücherberg (literally "book mountain") - 2 photos
+"I think the building is fun, looks futuristic and ultimately provides something interesting to look at," said Lisette Verhaig, a passer-by at the road-side.
+And Stefan Spermon, IT technician in a major firm based nearby commented: "It's definitely a thing of beauty, the building."
+However, I do wonder why people would need another library in this day and age.
+Everyone has the Internet, an iPad and eBooks.
+No-one goes into one of these old-style libraries voluntarily nowadays, or am I wrong?
+Spijkenisse, a sleepy town outside the gates of Rotterdam, which barely merits a visit, is a special record-holder.
+The 80,000-resident municipality has the lowest literacy rate in the whole of the Netherlands.
+In order to counteract this asinine situation, the decision was made a number of years ago to make a contribution towards general education and to recreate the seven fictitious bridges that feature on the Euro notes as pretty, painted reinforced concrete miniatures.
+The success of the education offensive was limited.
+And so the city fathers acknowledged that there was only one way to become master over the statistics: a library had to be built!
+Winy Maas of the Rotterdam-based architectural firm MVRDV, master of audacious bar charts and producer of humorous and often cynical buildings, took the project on with his customary composure, and turned up at the competitive hearing in 2003 with five books under his arm and a grin on his face.
+And with the judging panel still looking at him with bewilderment, shrugging their shoulders, the impertinent Maas stacked his chosen props by order of size to form a pyramid and rounded off his presentation - now suitably backed up with action - with the words: "Dear Municipality!"
+So this is my suggestion for the Spijkenisse Book Mountain - for the so-called Boekenberg!
+Nine years later, the 30-million-euro mountain has been lifted up.
+It is part of a revitalisation project, which also includes an underground car park, a supermarket, a post office and a small number of adjacent apartment buildings and terraced houses, with a total of 50 dwellings.
+At the beginning of November, the Bücherberg was awarded second place in the "Best Library of NL 2012" competition.
+In addition, the project is also nominated for the Dutch National Wood Award 2012.
+Thus, the faceless small-town retort, that until now had nothing more to offer than a post-modern pedestrian area and a stunningly ugly town hall, behind whose white facades one would expect to find a dairy plant, has been bolstered by a piece of contemporary architecture.
+First and foremost, however, Spijkenisse now has its first public cultural building in the history of its existence.
+The long journey to the book
+The first impression: the Eldorado of books beneath a cheese dome.
+There is in fact a lift that climbs through the centre of the mountain massif, however, the true joys of space and literature are revealed when scaling the topography on foot.
+The interior space, glazed throughout, is bright and open, the fired clinker floors and the elegant street lamps speak the unmistakable language of a public town square.
+The urban ambiance is perfect.
+You are already on the lookout for a park bench, a dog, and boys and girls playing football.
+And everywhere there are books, books, books.
+"Normally book shelves run along the facade, and in the centre there is a large, dark space, which is usually unpleasant and impersonal," says Winy Maas.
+We turned the classical spatial configuration on its head and turned the reading area inside out.
+The interior of the Bücherberg is cleverly used: in the centre there are offices, an Internet library, a chess club, an environmental centre and the central technical support room.
+One particularly special feature are the black book shelves, which simultaneously act as wall cladding, parapets and railings for the stairway.
+The appearance, feel and scent are foreign.
+Even die-hard architects and construction engineers shake their heads at the unknown construction materials.
+"Here we wanted to work with recyclable materials," explained Joop Trouborst, Project Manager for the Municipality of Spijkenisse, on request of the Standard.
+And thus one day we stumbled across a suitable waste product used in agriculture, on a Frisian farm.
+For many years, millimetre-thick artificial fabric has been used in greenhouses and fields in the Netherlands as a base layer.
+It is inexpensive and saves time.
+The thin textile lasts for two seasons and is then disposed of as bulk waste.
+For the library, the fabric was - for the first time in these quantities - pressed into four-centimetre-thick boards.
+Under heat and pressure, the so-called Landbouw plastic (KLP) changes colour to a dark, homogeneous and robust material, that smells like a mixture of new car smell and the smell of trainers.
+After 105 steps you have reached the summit.
+At the end of the 500-meter-long journey, you are rewarded in the Literature Café, not only with a fantastic view of the city, but also with Dutch croquettes and potted ficus trees.
+These provide atmosphere, but most importantly, regulate the air humidity in the literary mountain range.
+Donations for the new soul
+"You would hardly believe it, but this building, in spite of the many glass panels, is a showcase project in the area of ecology," said Trouborst.
+It is heated and cooled using geothermal heat.
+Although the Bücherberg has a glass cover, the sun only shines only briefly into the interior, even on sunny days.
+The broad, laminated wood glue beams positioned at right-angles to the glass facade, provide shade and absorb the majority of the sunlight.
+The indoor temperature is very pleasant.
+The rest is taken care of by fully automatic roller blinds.
+Stefan Spermon, initially a sceptic of the IT sector, has already ventured into the new library.
+Lisette Verhaig has also visited already.
+So too has TCM-teacher, Cynthia Bogarde, who even refers to the Boekenberg as Spijkenisse's "long overdue soul."
+The reason: At the inauguration just a few weeks ago, every citizen was invited to donate a book from his/her personal collection.
+This was, for the time being, to fill the optical gaps in the not yet fully stocked library - currently there are 70,000 items.
+The concept has been a success.
+The shelves are full to capacity.
+"Nothing is worse than a half-empty library," said architect Winy Maas.
+"I think that, thanks to our invitation, every resident now has a certain bond with this new building.
+Everyone knows that their book is part of the building.
+Even if it's just for decoration.
+As such, MVRDV have succeeded in mastering the master discipline that specialist jargon refers to as the formation of identity.
+Spijkenisse has written literary history.
+However young and uneducated it may be.
+This is ultimately a starting point for identity.
+Szabo: "Germans must play a greater role"
+In the vote on the incorporation of Palestine, Germany abstained from voting.
+According to Stephen Szabo, Expert in US-European relations, in so doing Berlin is walking a thin diplomatic line.
+Deutsche Welle: At the beginning of the week, Germany had initially signalled that it would vote against the Palestinians' application for observer status within the United Nations.
+However, Berlin subsequently abstained from voting.
+Why?
+Stephen Szabo: Germany does not support what the Israelis have done in Gaza.
+Now, however, due to their special relationship with Israel, Germany must be cautious.
+At the same time, however, I do not believe that it supports the American position either.
+Germany wanted to demonstrate its independence - albeit without being too critical of Israel.
+During the uprising in Libya in March 2011, Germany likewise abstained from voting, when it came to establishing a no-fly zone.
+This was ultimately implemented by NATO.
+Does Germany find it difficult to adopt a clear position when it comes to important international affairs?
+Yes, it does.
+That is because it has just reorganised its foreign policy, indeed moving away from a policy that was, so to speak, managed by the USA, in favour of a German foreign policy.
+This situation is aggravated by the fact that the Europeans do not have a coherent and standardised policy.
+The Germans thus find themselves caught between two fronts.
+It is expected of them that they play a more independent role, yet this is something that they are not accustomed to.
+I believe that they are still finding their way in this role, but they are en route to a "more normal" foreign policy.
+A foreign policy similar to that of France, or Great Britain.
+So what specifically does a "normal" foreign policy entail, from a German perspective?
+It shows a willingness to adopt positions on international matters, which are independent of those of the USA or European partners.
+I believe that the German foreign policy is motivated by the economic policy, that is, by export and its relations with certain regions such as Russia, China or the Near East.
+Germany's economic interests are to a certain extent different from those of the other major powers and therefore Germany must protect its interests.
+Have these economic interests had an influence on their attitude towards the Near East conflict and their voting in the UN?
+On the one hand, Germany has major revenue markets in the Near East, and particularly in the Gulf States.
+Therefore it must be careful not to affront the public, but also the elite in the Arabic countries.
+In any case, this plays a role.
+However, I wouldn't want to ascribe too much weight to this. This is not an entirely one-sided relationship.
+Nonetheless, it does play an important role in Germany's considerations.
+Has Germany damaged its relations with the USA, by abstaining to vote on important decisions, such as the vote on Palestine?
+I think that in Europe, and even in the USA, a great understanding for the German position prevails.
+Therefore I do not think that this was as dramatic a fracture as was the case in the matters regarding Libya.
+Perhaps it will even earn Germany a certain degree of respect.
+After all, it signals that the country must be taken seriously as an international player and that its interests must be considered.
+In Europe there are diverse opinions regarding the Palestinian initiative.
+The USA, on the other hand, have spoken out clearly in favour of a veto.
+Are there differences of opinion between the USA and the many European nations?
+Due to the American domestic policy, these differences have always existed.
+I think that secretly, the government under Obama actually has a great deal of understanding for the European situation.
+However, due to the political situation here, the government is naturally unable to voice this position publicly.
+It is my belief that the actual differences in opinion are not so vast as they always appear.
+If you look at the relations between Obama and Prime Minister Netanjahu, Obama is really not quite so enthused by Netanjahu's policies.
+Does Germany find it difficult to reconcile its close relations with Israel and the USA on the one hand, and the position of its most important partners in the EU on the other?
+I think that this is precisely what makes things so difficult for the Germans.
+It would of course be a little simpler for the Germans if there were a coherent and standardised European policy, which is currently not the case.
+Thus they are unable to be part of a wider authority and must instead drive matters forward from their own position.
+This is precisely what they are doing with the Euro.
+I believe that in the future Germany will take on a leading role in urging Europe towards a standardised European position.
+This is, of course, no simple task for Germany, on account of its relations with Israel.
+This has always been a sensitive subject.
+Yet I do think that Germans are clear that they must play a more independent role.
+Does Germany view itself as playing the role of an important international player - does Germany actually want to assume a leading role?
+Or does Germany still find leadership roles difficult?
+Germany is still not used to it, the country continues to be uncomfortable and, for obvious reasons, still finds it difficult to play a more prominent role.
+If we look at the Euro crisis for example, every time that Germany assumes a more prominent role, various anti-German feelings become apparent.
+This does not make matters simple for the Germans.
+This is actually the same old problem: one does not want to be surrounded by hostile countries.
+From this stance, Germany is in a much more difficult position than the USA.
+It must be receptive to the most diverse of neighbours and opinions, and this is not easy.
+The influence of the USA over European politics is continually diminishing, yet the EU is currently not feeling this vacuum, so who is filling the gap?
+The Germans will simply have to play a greater role.
+Even if they do not like it, even if it is uncomfortable and makes them even more unpopular - c'est la vie!
+Stephen Szabo is associate director of the Transatlantic Academy in Washington, an institute in which academics and political experts from Europe and North America come together to research the challenges of the transatlantic community.
+Szabo is also a member of the German Marshall Fund, in which he has specialised in German policy, US foreign policy and transatlantic relations.
+"Brand protection" in China: When Puma and Armani suddenly become Chinese
+Armani is a world-famous brand, Polo Ralph Lauren likewise.
+However, what is Armani Polo?
+Behind this name hides a fully officially registered brand in China, however, one that has nothing whatsoever to do with the original companies.
+Nonetheless, it is enjoying protection, provided the actual creators of the names do not sue.
+And even then it is not clear whether they will have any rights.
+"It is becoming increasingly more difficult for foreigners to protect their brands in China," said Thomas Pattloch, lawyer within the Taylor Wessing law firm, who specialises in copyright infringement in the Far East.
+Every week a new case lands on my desk.
+All the copycats require are a few additional letters in order that they can register their brands.
+Thus Gucci simply becomes Lu-Gucci, Prada-Kny is registered in place of Prada.
+German companies are also 'legally' copied in this manner, such as manufacturer of sporting apparel, Puma.
+Pattloch opens a file containing registrations with the trademark office in Peking.
+On 14 September 2010 a Chinese company copyrighted the brand name Zegna DF Puma there, an alias that also helps itself to the name of fashion retailer Ermenegildo Zegna.
+The fact that the Chinese are world champions in copying and infringing on intellectual property is well-known.
+In the major cities there are multi-level department stores that sell counterfeit goods almost exclusively.
+Pattloch's cases, however, are slightly different: on behalf of his clients he takes action against the fact that Chinese companies can be granted the right to use a name by the trademark office, fully officially, which is already protected elsewhere.
+The Chinese call this "Bang Mingpai," a passenger brand.
+The word is based on "Bang Dakuan."
+This refers to women who latch onto rich men.
+The Chinese authorities are unaware of any wrongdoing.
+"This harms business and we must fight against it," challenges Pattloch.
+"The brand is watered down, its uniqueness disappears - the image damage is enormous."
+The financial losses and process costs of the affected branches amount into the millions, especially in the case of expensive flagship products.
+According to information from market research company CLSA, with a volume of 15 billion euros annually, China is the third largest market for luxury items, and the fastest growing.
+However, the deletion of dubious entries in the trademark registry are difficult to achieve, and cost a pretty penny.
+The process can last for up to nine years, with an uncertain outcome.
+Pattloch reports of instances whereby the court dismisses cases, because after a long period of time, the name to which the objection is being raised has become a "market reality."
+If the complainant is unlucky, he may even have to pay the plagiarist money for having infringed on his trademark in China, said Pattloch.
+Sometimes the law of the jungle prevails here.
+Famous cases also relate to graphic elements.
+In 2009, Daimler lost a legal battle with the construction machinery manufacturer Sany, the company that recently acquired German concrete pump manufacturer Putzmeister.
+Even today, the Chinese company is therefore permitted to use an emblem that resembles the Mercedes star.
+Volvo-purchaser Geely originally used a blue and white logo that resembled the BMW logo; the dispute was arbitrated and Geely was forced to change it.
+Fashion house Lacoste lost a suit in China against copycats from Hong Kong and Singapore, who were using the famous crocodile looking in the other direction.
+The Chinese authorities are unaware of any wrongdoing.
+The CTMO trademark office in Peking does acknowledge that there were bottlenecks in 2010 due to limited staffing and equipment.
+In the past year, however, things reportedly "returned to normal following this emergency situation regarding the work flow."
+Thus the stock of unprocessed appeal proceedings was reduced by 22 percent.
+Almost 57,000 such cased were closed, 75 percent more than in the previous year.
+Nonetheless, there are still 81,500 appeals waiting to be resolved in the office.
+To remedy this is very expensive
+As is so often the case in China, the figures are imposing.
+In the past year, more than 1.4 million applications for trademark protection were submitted to the CTMO, almost one third more than in 2010.
+This is a record and means that China, for the tenth time in succession, is the global leader when it comes to new trademark applications, informed the authority.
+The same applies for the inventory of valid trademarks, totalling 5.5 million in number.
+In 2011, 1.8 billion yuan in fees were received.
+Put simply, this means that each application costs on average 1,280 yuan, or 160 euros.
+To appeal against an application costs many times this amount, as can be seen in the case of the German family business, Freudenberg.
+For more than seven years, the group has been contesting against a Chinese plagiarist.
+The Germans did in fact manage to expose the company's illegal manufacturing of copied motor vehicle parts.
+However, the copycat still secured the Chinese rights to the Freudenberg brand.
+This is something we missed ourselves, as family names cannot be protected in Germany, said Hanno Wentzler, Chairman of the Board of Management at Freudenberg Chemical Specialities in Munich.
+The CTMO trademark office then also dismissed the Munich-based company's appeal.
+In the next two instances, Freudenberg was proven right, however the opposing party continues to contest the matter to this day.
+You have to pay extremely careful attention.
+The matter is now pending before the Supreme Court.
+Wentzler is confident that the matter will be brought to a positive conclusion and praises the professionalism of the courts.
+However, he also says: "The process is extremely expensive and takes a lot of time, money and nerves."
+The internal costs can barely be calculated, the company archive even had to look through century-old records in order to provide proof.
+Five years ago Freudenberg unsuccessfully offered the opposing party a "high six-figure sum in euros" as settlement.
+"This shows how much this is worth to us," says Wentzler.
+The dangers in the Far East even threaten to spilling over, back into Europe.
+Particularly if imitators secure unprotected brand names there.
+For example, a Chinese manufacturer wanted to register the Freudenberg label for shoes and leather in Germany.
+This is a business sector that the group had long vacated, yet nonetheless managed to prevent the registration.
+"You have to pay extremely careful attention," says Wentzler.
+Both he and Pattloch advise companies to be very careful when conducting business with China.
+It is not sufficient to rely on international trademark rights, rather foreigners should also register "everything" that is in any way worthy of protection in China as well," said Wentzler.
+Otherwise costs can be much more expensive than the registration fee.
+In actual fact: if Freudenberg were to loose at the final hurdle of its trademark drama, they would probably have to pay the opposing party license fees for the use of their own name, explained Wentzler.
+Or alternatively we would be forced out of the market in the respective sector.
+World AIDS day: Stomp, sing, help
+In Heidelberg, the Imbongi choir is rehearsing - and in the Swaziland, AIDS orphans are delighted.
+The history of a link that overcomes far more than a distance of 8,733 kilometres.
+First of all, the stamping: cowboy boots, basketball shoes, ladies' pumps and men's loafers attempt to find the beat on the parquet floor, and quickly do just that.
+One-two-three-four.
+Only then do the voices of the singers slowly swell - alto, bass, tenor and soprano surge, beguile and haunt.
+And Fiete Hopf, the 29-year-old conductor, almost rises up out of his shoes as he brings the ensemble together with his smooth, yet wild gestures.
+It is Monday evening and in the music room of the Institute for Medical Psychology in Heidelberg the Imbongi Choir are practising a new song.
+The fifteen singers, aging from 23 to 69 years old, range from human geneticists to the maintenance man.
+"Om'Obani" is by no means a simple piece, with each voice having a different text, and in an extremely foreign language at that: Zulu, which is spoken by eleven million people in South Africa, Botswana, Lesotho, Malawi, Mozambique and in parts of Swaziland.
+Helping others to help themselves
+There are around 34 million people infected with HIV around the world, as according to the estimations of Unaids, the United Nations' programme to battle AIDS.
+Of these, 23.5 million live in South Africa.
+In Swaziland, there are 245,000 AIDS orphans.
+Meanwhile, more than 40 percent of the population are HIV positive.
+The Voices for Africa Association has found sponsors in Germany for 180 AIDS orphans in the village of Esitjeni.
+70 of these attend a secondary school.
+For 15 or 20 euros per month, you can become a sponsor.
+This guarantees the child money for school, a school uniform and a warm meal each day in the Gogo Centre.
+In Zulu, Imbongi means storyteller or worshipper.
+In this region, no-one can speak the Bantu language fluently, but they can sing it.
+For almost ten years the choir has been practising songs in this foreign, 'soft' language, and now and then they bring them back to where they originally came from: the South of Africa.
+For an 8,733-kilometre flight away from Heidelberg, in the north west of the Swaziland Kingdom, lies the village of Esitjeni, which relies on the vocal power of the German choir.
+Forty percent are infected.
+Around 2,000 people live there, some still in simple mud and straw huts, and the majority of them are children.
+More than 300 of them no longer have parents, as they succumbed to the HIV virus.
+In Esitjeni you get a small foreshadow of the illness from which all of Swaziland is suffering: according to Unicef, the region has the highest HIV infection rates and the lowest life expectancy in the world.
+Circumcision, which has been proven to reduce the risk of contracting the virus by half, is barely practised by the population.
+More than forty percent of people in the Swaziland carry the immunodeficiency virus, and dying in you mid-thirties is by no means rare.
+On a group trip to Africa in early 2005, the Choir visited the village, but first and foremost, the Imbongis saw many children on the streets, lacking not only in parental care but in practically everything else as well: food, clothing, education.
+Without a school leaving certificate, there are barely any opportunities, particularly in a poor country.
+Initially it was the private commitment of individuals to send a child to school and enable him/her to have one warm meal a day for a few euros per year.
+However, just one year later, the choir established the "Voices for Africa" Association, which since then has been looking after the AIDS orphans in Esitjeni at an almost professional level.
+Facts on sexually transmitted infections.
+What are the most important sexually transmitted diseases?
+Bacterial STIs include syphilis, chlamydia and gonorrhoea.
+Common viral STIs are HIV, human papilloma viruses, herpes genitalis and hepatitis.
+Crabs and scabies belong among the parasitic STIs.
+Who are the main affected groups?
+Syphilis and gonorrhoea occur primarily in men that have intercourse with other men.
+The Robert Koch Institute understands that at least four in five of all syphilis cases reported in Germany are transmitted by means of sexual contact between men.
+Among heterosexual adults, chlamydia infections, trichomoniasis, candidiasis (fungal), gonorrhoea and human papilloma viruses are frequently occurring sexually transmitted diseases.
+The spread of HIV among heterosexual adults in this country is relatively low; however, around 20 percent of newly contracted cases of HIV are found in this group.
+Among young people, chlamydia infections are much more common than in other population groups.
+According to European surveys, three quarters of all infections affect young people between the ages of 15 and 25.
+In this country, human papilloma viruses are also frequently found in young people.
+How has the number of infections developed?
+Not all sexually transmitted diseases are notifiable.
+According to the Robert Koch Institute, the number of syphilis infections has more than doubled from 1,697 cases in 2001, to 3,698 cases in 2011.
+The number of newly contracted cases of HIV has been on the decline since 2007.
+In 2011 there were around 2,700 cases.
+This is around one tenth fewer than the previous year.
+Which symptoms indicate a sexually transmitted disease?
+The infectious diseases can cause ulcers in the genital area, discomfort when urinating, discharge, lower abdominal pain and blisters or warts.
+However, often they cause no pain or any other symptoms, thus remaining undetected.
+How can you protect yourself?
+Condoms can reduce the risk of contraction, however, they do not offer 100% protection.
+This is because occasionally, the pathogens of sexually transmitted diseases can also be passed on via smear infections and close bodily contact.
+Therefore, first and foremost experts recommend that people with frequently changing sexual partners undergo regular examinations.
+If diagnosed early, the majority of STIs can be cured and long-term consequences avoided.
+Through sponsorships donations and by no means least the funds that the choir raises across the whole of Germany, the money all adds up.
+"In total, we have already sent around 200,000 euros to Esitjeni," said Annette Lennartz, Chairperson of the association.
+In the village itself, Zodwa Dlamini, a self-assured and assertive woman, manages the money from Germany.
+She makes sure that the orphans have good accommodation, for example with one of their grandmothers.
+The Gogos, as the old ladies are called in Zulu, are the pillars of the village.
+Some of them have up to 14 orphans living with them, providing them with a roof over their heads and making sure that the children get to their school classes punctually every day, in their school uniforms.
+Anyone who doesn't have anyone left, arrives at the shelter with Khanyisile, a single woman who earns the same salary from the association as the two cooks who cook for more than 200 hungry children every day.
+In addition, "Voices for Africa" has established a sewing school,built two chicken coops and, together with the American health organisation, PSI, organised for many in the village to be tested for HIV.
+This is nothing to be taken for granted, as is clearly the attitude towards illness throughout the entire country, the best way of keeping things under wraps is if people are dead.
+A king with 14 wives
+"AIDS is an absolute taboo subject," said Annette Lennartz, "because it is associated with sexuality."
+This is actually strange for a country in which the king officially has 14 wives.
+The last absolute monarch of sub-Saharan Africa, King Mswati III., is known for his excessive lifestyle.
+Polygamy in place of democracy.
+Among other factors, the fact that the HIV virus has spread quickly over the past number of decades can also be attributed to this officially sanctioned lifestyle.
+Another factor is the large number of migrant workers who carry the virus across the country.
+There are free condoms on every corner, said Annette Lennartz, "but they are hardly used.
+The culture prescribes otherwise - flesh to flesh."
+In order to promote the cultural exchange, the Imbongi choir travels through Southern Africa every two or three years and sings songs of melancholy, fighting spirit, confidence and black self-esteem, which many from the southern tip of the black continent still know from the times of apartheid.
+A bus full of white people, who sing songs in a black language - this degree of recognition brings not only morale and joy, but some grim-faced border soldiers even shed a few tears.
+The journey always leads to Esitjeni, where the singers visit their sponsor children.
+Even though you can barely find the small village on a map, it is more than well-known in the valley of the Ezulweni River.
+"Go to Esitjeni, that's where the light is," say the people there.
+And if you make the 8,733-kilometre flight back to Heidelberg, to visit the stomping singers in their rehearsal room, you'll see that the light is there too.
+Messenger: NASA discovers ice on Mercury
+The Messenger probe has found evidence of ice on the planet Mercury.
+It is thought that the ice cover may be up to 20 metres thick.
+The US space agency, NASA, has proven the existence of ice on the planet Mercury.
+Although the planet lies closest to the sun, it does have frozen water - as shown in three studies published on Thursday in specialist magazine "Science."
+The Messenger probe has found evidence that there is an ice cover in the region of the planet that lies permanently in shadow.
+This is thought to be at east 30 centimetres and perhaps up to 20 metres thick.
+The water presumably came from comets or perhaps also asteroids that impacted with Mercury.
+However, no-one is linking the discovery of ice with the existence of life on the planet, said Chief Scientist for the Messenger probe, Sean Solomon.
+The temperature on Mercury can reach up to 426 degrees Celsius.
+That said, the findings could help explain how water and other building blocks of life reached other regions of the solar system.
+Unknown to the majority of the Earth's inhabitants, there are probes, telescopes and small robots such as the Phoenix, deployed to research the depths of the universe.
+From time to time, they transmit images to Earth: small peepholes into the infinite expanse.
+This image comes from a camera developed by German researchers at the Max Planck Institute.
+The eight planets of our solar system, plus the dwarf planet Ceres.
+Like Pluto, which orbits around the sun behind Neptune, Ceres is not a planet according to the new definition of the term issued by the International Astronomical Union in 2006.
+This image section from an infrared recording by the Spitzer telescope shows a "family portrait" of countless generations of stars: the oldest stars are seen as blue dots, while more difficult to identify are the pink-coloured "new-borns" in the star delivery room.
+This star-forming region - rather unromantically named W5 by scientists - was discovered by the Spitzer telescope in the Cassiopeia constellation, at a distance of 6,500 light years away.
+This shimmering glow of a dying star was captured by NASA's Spitzer telescope.
+The donut-shaped ring consists of material, ejected by the star in the process of dying.
+In the huge Trifid Nebula, 5,400 light years away from the Earth, new stars are created from gas and dust.
+NASA's Spitzer telescope shot this photo of the galactic delivery room.
+The Pleiades star cluster, also referred to as "The Seven Sisters," can be seen with the bare eye at night.
+With the telescope, however, the colours really come into their own.
+In this infrared photo, the Helix Nebula looks back at the observer like a red eye.
+It is located 700 light years away in the Aquarius constellation.
+Its similarity with the continent resulted in this Nebula acquiring the title 'North America'.
+A combination of normal and infrared photography produced the spectacular colouring.
+This baby star could only be captured in its full beauty using the Spitzer telescope's infrared detectors.
+Saturn and its rings: How these occurred is the greatest puzzle in the field of astronomy.
+Perhaps they are the remnants of a moon of Saturn, which disappeared without a trace 4.5 billion years ago.
+One of the largest and sharpest pictures from the Hubble telescope: the Whirlpool Galaxy
+Depending on the colouring, photographs of spiral galaxies can become genuine works of art.
+The photograph published by the European Southern Observatory shows the Trifid Nebula in the Sagittarius constellation, several thousand light years away.
+The name Trifid stems from the Latin word trifidus (divided into three parts), as dark stripes of dust divide the core of the birthplace of stars into three parts.
+In the Ophiuchus constellation, astronomers have photographed the signs of a cosmic collision: 400 million light years from the earth, the cores of two merging galaxies move rapidly towards one another, destined to collide.
+This star birth was captured by the Hubble telescope in the M83 spiral galaxy.
+Anyone who doesn't like technical abbreviations may prefer to call it by its nickname, the Southern Catherine Wheel.
+The photo taken by the Hubble space telescope shows a section of the Iris Nebula in the Cepheus constellation.
+The nebula, 1,400 light years away, consists of particles of dust that are ten to one hundred times smaller than standard house dust.
+This image was put together from the X-ray images captured by various telescopes.
+It shows a ring of black holes, 430 million light years away from the Earth.
+This group of galaxies, named Arp 273, was pictured for NASA by the Hubble space telescope.
+Scientists call the larger spiral galaxy UGC 1810.
+This star nebula is home to the brightest group of young stars in our Milky Way.
+This 'star cradle' continually produces new youngsters.
+Likewise, this star cloud, connected to the Rosette Nebula, continually produces new baby stars - 5000 light years away from the Earth.
+In this bright shining galaxy with one small black hole, there exists no dust - only gas.
+Researchers presume that it only came into being shortly after the Big Bang, when the universe was comprised primarily of hydrogen.
+Our view of the universe: the most important telescopes
+The telescope is thought to have been invented in 1608 by Hans Lipperhey - even before Galileo Galilei used the device to observe the stars one year later.
+Since then, the mirrors in optical telescopes have become increasingly large and the insights that they provide increasingly profound.
+For a period of 30 years, namely from 1947 until 1975, the Hale telescope in the Palomar Observatory near San Diego was the largest telescope in the world.
+The mirror, shown in the image, had a diameter of five metres.
+Arizona, USA,is home to the Large Binocular Telescope.
+It enables views of space via two mirrors, each with a diameter of 8.4 metres.
+The inner workings of the Gran Telescopio Canarias on the Canarian island of La Palma are huge - the mirror alone has a diameter of 10.4 metres.
+The mirror of the Southern African Large Telescope in South Africa is segmented - to reduce costs.
+In spite of this it achieves a diameter of around eleven metres.
+The disadvantage of this inexpensive construction method: the telescope is securely clamped at its angle of inclination and its movement is therefore limited.
+The Hobby Eberly telescope in Texas also has a fixed angle of inclination.
+What sets it apart: the high light-gathering capacity.
+This - in spite of its comparatively low mirror diameter - even matches that of the world's largest reflector telescopes.
+With the help of a radio telescope in Arecibo (Puerto Rico) researchers can listen for extraterrestrial signals in space.
+The radio telescope has a diameter of 305 metres.
+In the "Search for Extraterrestrial Intelligence" (SETI) every computer owner can be of assistance, by making his/her processing capacity available.
+View of the European Southern Observatory (ESO) in the Chilean Andes.
+This is home to the Very Large Telescope, which lives up to its name.
+With its total of four mirrors, the telescope can also focus on the medial infrared spectrum.
+Likewise to be located at the ESO Observatory in Chile, the European Extremely Large Telescope is also being planned.
+Its main mirror is to span a full 42 metres and will be made from almost 1,000 mirror elements.
+However, images are not to be expected until 2018 at the earliest.
+Until 2007, the two Keck telescopes at the Hawaiian volcano, Mauna Kea, were the largest in the world.
+They each have two mirrors, each with a diameter of ten meters.
+The Keck Telescopes are part of the Mauna Kea Observatory, which alongside the Keck telescopes, can look to the heavens with the help of the Subaru telescope and the IRTTF.
+Another huge new telescope is also to be built on the Mauna Kea, with a mirror diameter of thirty metres.
+Here you can marvel at an artist's impression.
+However, the most important insights into space are provided by the Hubble space telescope.
+Since 24 April 1990 it has been supplying images of distant worlds.
+Since March 2009 the Kepler space telescope has been searching for extra-solar planets, especially for any that may be inhabitable.
+On 2 February 2011 it was announced by NASA that 1,235 planetary candidates had been identified since the mission began.
+The image documents the final launch preparations on the Kepler space telescope.
+The James Webb Space Telescope (JWST) will be launched into space on board an Ariane5 rocket by 2018 at the earliest.
+The primary mirror of the infrared space telescope has a diameter of 6.5 metres.
+One of the telescope's tasks is to search for light from the first stars and galaxies that emerged after the Big Bang.
+Scientists are assuming that ice also exists at Mercury's south pole.
+However, there is no reliable data in support of this as the Messenger orbits around the planets much closer to the north pole.
+For decades, radar measurements have indicated that there is ice on Mercury.
+Thanks to the Messenger probe that was launched in 2004, the first to orbit Mercury, scientists can now be certain.
+Drink butter on a daily basis - and live to 168 years of age
+In Southern Azerbaijan, many people reach biblical ages.
+There is even a museum of longevity.
+A hunt for evidence in the country in which 97 years old is still comparatively young.
+In Southern Azerbaijan, many people reach ages that can almost be considered biblical.
+There is even a museum of longevity.
+A hunt for evidence in the country in which 97 years old is still comparatively young.
+The journey through the Talysh Mountains can be described as wild and romantic.
+The minibus rumbles over the winding streets, past densely wooded hills, raging rivers and simple farmhouses.
+Everywhere is green and lush - you could be forgiven for thinking you were in the Black Forest.
+However, this is the deep south of Azerbaijan, and the border with Iran is just a few kilometres away.
+This is the home of the Caucasian people group, the Talysh, of whom not much is known except that they speak perfect Persian and Azeri and live long lives.
+The final stop is Lerik.
+The small town is bursting with overpowering architecture from Soviet times, which doesn't fit with the picturesque mountain landscape at all.
+Tourists from Europe rarely come here; the journey from Azerbaijan's capital city, Baku, is too arduous.
+It takes eight hours to travel the 323 kilometres, as too much of the route is just a single track.
+The fabulous wealth, for which the country has its oil in the Caspian Sea to thank, has not yet arrives here in the province.
+Yet Pilata Fatulayeva (48) is convinced that Lerik has what it takes to be a tourist attraction.
+"Baku became famous in May due to the Eurovision Song Contest, and next year we are having a festival to celebrate the oldest people in the world," said Fatulayeva.
+She is the Director of the Museum of Longevity, most likely the only of its kind in the world.
+Here the lives of eight dozen Talysh from the area who lived to older than 100 are documented. Fatulayeva points out a black & white photo.
+This here is my grandfather, he was 120 years old.
+At the age of 136 he fathered another child.
+However, the unrivalled star of the museum is shepherd Shirali Muslimov who is said to have lived to 168 years old.
+However no birth certificate exists to confirm this.
+And given that the longest confirmed lifespan was 122 years of age, Muslimov's claim seems extremely doubtful.
+"He was born in 1805, here in the region, and died in 1973," explains Fatulayeva.
+The man married three times and had 23 children, and is said to have fathered another daughter at the age of 136.
+So did Shirali Muslimov miscalculate his age by a couple of decades?
+But Rembrandt Scholz, researcher on ageing at the Max Planck Institute in Rostock, has also heard of people living to impressive ages in Central Asia.
+"A strikingly high number of extremely elderly people can also be found in some areas of China, in Japan or the Hunza Valley in Pakistan," said Scholz, "while there is also an extremely large number of very old men in Sardinia."
+Due to lacking documentation, however, there is no scientific proof of age, particularly as there are no birth registers.
+Melted butter by the glass, every day
+However, the fact remains that the people of the region surrounding Lerik reach a biblical age with striking regularity.
+There are currently 20 individuals older than 100 years of age.
+So why do so many very old people live here in the south?
+The Azeri travel guide Farid Mugimzadeh explains this as being due to the special Talysh genetics.
+In contrast, Museum Director Fatulayeva believes that it is due to diet.
+However the notion that the calorie-rich diet of the Talysh, who love meat, bread and especially dairy products, and of whom many drink a glass of melted butter on a daily basis, could be considered healthy from a nutrition science perspective does not really seem plausible either.
+Or is it the traditional way of life that keeps the people young? In Cengemiran, a tiny settlement not far from the town of Lerik, lives Rubaba Mirzayeva.
+At 97 years old she is still comparatively young for the area.
+Mirzayeva, who claims to have 143 descendants, lives in a simple wooden house, which is typical of the entire Caucasus region.
+She sits on the floor with a butter churn, which she rolls backwards and forwards tirelessly.
+Eight people live here under this roof, including one of Mirzayeva's sons and a daughter, both of whom have been grandparents for some time.
+There are also two small children running around.
+In the kitchen, tea is prepared for the guests, which is served in typical, bulging Armadu glasses.
+Mirzayeva's white teeth stand in perfect rank and file, beneath her headscarf she conceals long, dark blond plaits, which her son proudly reveals for us.
+I have always washed my hair with milk, and it has never fallen out or lost its colour.
+"I have never used shampoo either," said Mirzayeva.
+Monthly pension is enough to live on
+She has only ever eaten what she could get from her own farm - tomatoes, potatoes, peas.
+My whole life I have never once bought groceries in the supermarket.
+Then she tells of her husband who was in the army.
+Things were at their worst during the time after the Second World War.
+However, everything became better when the "beloved father" Heydar Aliyev took the rudder.
+The propaganda seems strange coming from the mouth of an old lady.
+Yet the cult that revolved around the father figure for the nation, who governed his country like a dictator practically knows no limits in Azerbaijan.
+He held power until 2003 and his son Ilham later took over the helm.
+At least there is no deprivation among Azerbaijan's elderly.
+Mirzayeva receives 230 Manat (around the same sum in euros) per month as her pension, which in a local context is an amount on which one can live comfortably.
+And perhaps Mirzayeva's long greying son is right: "The elderly enjoy a deep respect in our culture."
+They live among their extended family, are loved, cared for and are happy.
+If this is not a reason to live for as long as possible, then what is?
+The notion of "human rights" is omitted from the constitution.
+The revolution has returned to Cairo.
+Competing demonstrations in Cairo reveal the deep division within the country.
+The future constitution based on Sharia law is fiercely disputed.
+The Egyptian President is not holding back his emotion.
+We must make the transition.
+"And making sure it succeeds is my responsibility, before the people and before God," he said on state television.
+His speech was aimed at the entire population,however in particular at the Coptic Christians, the liberals, enlightened Muslims and secularists.
+For all of them, until now hopelessly estranged in a bewildered opposition, are fearful.
+They are fearful of a God State on the Nile at the mercy of the powerful Muslim Brotherhood.
+According to Mohamed Mursi, speaking almost apologetically, he has temporarily restricted the authority of the constitutional court and increased his own authority, "in order to rescue the revolution."
+However, Egyptians - and the world - are not entirely sure what the 61-year-old engineer who holds a Doctorate from the American University of Southern California, really wants to save.
+Should the judiciary be deprived of power?
+In actual fact, the 234 articles, which have been pushed through by the Islamic-dominated 110-person Constituent Assembly, are in some aspects cause for concern.
+As was also the case under previous constitutions, under the draft judicature is justified on the "principles of Islamic law."
+Yet what are "principles"?
+This was and remains subject to interpretation and there is concern that the Islamists will make use of the woolly formulation and the resulting room for legal manoeuvre in favour of a stricter interpretation of Sharia law.
+This is at least suggested by one newly added article: in all issues affecting Sharia law, the Al Ashar University must be consulted, the country's most important Islamic institution, which has great influence throughout the whole of Sunni Islam.
+This can, but does not necessarily have to mean that the clergy will oversee legislation, which would result in the de facto incapacitation of the judiciary.
+Much in the constitutional draft is open to interpretation
+Also problematic: civil military jurisdiction will continue to be upheld.
+During Mubarak's rule, these courts served to suppress opposition.
+Following the fall of the dictator, up to 11,000 civilians were under military imprisonment.
+According to the draft, the state should also protect "the true character of the Egyptian family, and promote its morals and values."
+From a legal perspective, this is formulated in such an unclear manner that state institutions could even use this article to control the content of cinematic art and literature.
+In plain language, this is nothing other than censorship.
+Incidentally, no article explicitly establishes the equality of men and women.
+Another does prohibit the insult or slander of the prophet Mohamed and his emissaries.
+However, what constitutes an insult and how this should be sanctioned remains unclear.
+Equally dubious is the formulation stating that "insulting people" is forbidden.
+Is a caricature of the president sufficient, or a joke at the expense of a jurist?
+Open to interpretation, like so much in the draft submitted by Mursi to be signed and that, in his own words, will be submitted to Egyptians for referendum "very soon."
+"The revolution is back"
+For weeks the opposition has been gathering to combat the superior strength of the Islamists.
+Tens of thousands gathered on Friday evening at the Tahrir Square in Cairo, in unfamiliar unity, and pledged to bring down the charter before it has even come into effect.
+"The revolution is back and we are going to be victorious," said Hamdin Sabbahi, third place candidate in the presidential elections.
+Noble Peace Prize winner and former Head of the International Atomic Energy Authority, Mohamed El-Baradei explained that the constitutional draft belongs "on the rubbish tip of history."
+Via SMS service Twitter, he accused Mursi's followers of wanting to lead "a coup against democracy."
+"If he calls for the referendum, we will go to his palace and overthrow him," said member of the opposition Jasser Said.
+"We have not yet grown tired, the blood of our brothers has not yet been atoned for," stated the Egyptian media, quoting opposition politician Chaled Ali.
+And several judges have signalled that they do not want to oversee the referendum, which would render it invalid.
+"The Koran is our constitution"
+The well-organised Muslim Brotherhood gathered for a counter-demonstration, although acting cautiously they did not choose the Tahrir Square but rather a mass prayer on the other side of the Nile, outside the Cairo University.
+Many veiled women and followers of the Salafis took part, shouting out: "The people demand the application of God's law."
+They demanded of Mursi: "Cleanse the country!" and protested: "The Koran is our constitution."
+A struggle for control over the symbolic Tahrir Square, where everything began, would have most likely provoked events verging on civil war.
+Quite clearly, this was something that Mursi's followers did not want to risk.
+The Muslim Brothers stated that both those against and those in favour of the constitutional draft had expressed themselves loud and clear.
+Now is the time to let the population decide at the ballot box, in which direction the country should move forward.
+It is a certainty that there is a majority in favour of the Islamists' draft.
+"The term 'human rights' does not even appear once"
+Hafez Abu Saeda is furious about this forced constitutive process, which actually should have lasted until February and should have involved all social interest groups.
+The 48-year-old human rights lawyer and Chairman of the Egyptian Organisation for Human Rights (EOHR) defended the Muslim Brotherhood, when imprisoned or in court under Mubarak.
+Not because he shared their world view, but because for him, human rights are indivisible.
+For this he was battered, condemned and imprisoned.
+"And now the term human rights does not even appear once in the new constitution," he bemoaned in a discussion with "Welt am Sonntag."
+The lawyer has resigned himself to Mursi extending his power to all three branches of state government.
+These measures are blatant breaches of the ground rules of democracy and will guide Egypt into a new dictatorship.
+"Instead of strengthening the civil society, the President is effectively suspending it," complained Saeda.
+Yet without civil society organisations, a democracy cannot function.
+Saeda feels abandoned,even by the international community, which is observing the battle over the ideological direction on the Nile with a mixture of curiosity and excitement.
+This could come back to haunt them.
+One demonstrator at the Tahrir warned: "You are letting loose a monster that you can no longer control."
+Norway's rakfisk: Is this the world's smelliest fish?
+Norway's five million people enjoy one of the highest standards of living, not just in Europe, but in the world.
+Could the secret of the country's success be connected to the local appetite for some exceedingly smelly fish?
+Take a selection of over-ripe cheeses.
+Place them in the midst of a pile of dirty, wet soccer kit.
+Leave for a week.
+Now you have the nose-numbing smell of rakfisk, one of the great Norwegian delicacies.
+I am in the small town of Fagernes, about three hours from Oslo.
+There is snow, spectacular scenery - and that odour, ever present, hangs in the air.
+Rakfisk is trout sprinkled with salt and fermented in water for - depending on how smelly you like your fish - up to a year.
+As the dark sets in and the weather turns cold, Norwegians flock to a festival here in Fagernes devoted to this most, well, captivating of foods.
+"You eat it raw, and then swallow a glass of aquavit," says Havard Halvarsen, full-time local firefighter but also the so-called "Rakfisk General," in charge of running the festival.
+All around us people are eating little cubes of the fish and knocking back quantities of drink.
+"Some people like the aquavit more than the rakfisk," says Havard.
+The drink can kill the smell.
+I try a few pieces.
+If you can avoid passing it under your nose, it is not bad - not unlike a slice of sushi that has been on rather a long bus journey.
+Rakfisk is a product of very different, poverty-stricken times in Norway when, pre-refrigeration, fish was soaked in airtight barrels of water and salt in autumn.
+Then in the depths of winter, well and truly fermented, it is taken out and - no doubt with the senses knocked out by alcohol - eaten.
+Only a generation ago, thousands of Norwegians were forced to leave their country in search of work, emigrating mainly to the US.
+Now the population is expanding fast - more than 13% are immigrants, attracted by plentiful jobs, high wages and a comprehensive care system.
+People from Sweden, the old rival and not so long ago far richer than Norway, stream in to work.
+Rakfisk is seen as signifying something important, a vital if rather smelly part of Norway's past.
+It is among the more expensive dishes you can buy.
+But then everything is expensive - a small glass of beer or a sandwich knock you back £9 ($14) each.
+Norway does not often make it on to the global news agenda - and most seem to like it that way.
+People here are still loath to mention by name Anders Breivik, the right-wing, racist extremist who gunned down and killed 77 men, women and children last year.
+Instead, the shootings are referred to as "the July the 22nd incident."
+Norwegians find it very difficult to believe that in their peace-loving country one of their own was capable of such brutality and murder.
+The growth since the early 1970s of one of the world's biggest oil and gas industries lies behind much of Norway's present-day wealth.
+"But oil is not the only reason we are doing so well," says Anna our waitress, handing round trays of maturing rakfisk and, with her long blond hair and startlingly blue eyes, the image of Nordic well-being.
+We are a - how you say - prudent people.
+Her English, like that of most people here, is flawless.
+We are not very showy, we do not like ostentation.
+Norway has handled its oil wealth very carefully - all but a small percentage of money from the industry is invested in a special fund for the benefit of future generations.
+When everyone else was throwing around money they did not have, in the years leading up to the global financial crash, Norway kept its purse strings tightly bound.
+"As long as we can ski in winter and go hiking in summer we are happy," says Anna.
+"And eat rakfisk," she adds with a carefree laugh.
+I stand in the snow and queue for something to eat - I have had enough rakfisk.
+Now an elk burger is certainly something different and rather succulent to the taste.
+But in the evening, it is more of that smelly fish.
+The hotel I am staying in is one of a number of venues hosting a rakfisk dinner where guests vote on the best - or perhaps the most nasally challenging - fish.
+There is a live TV link up to a compere in a bow tie surrounded by plates of rakfisk.
+It is like the Eurovision song contest.
+"What score do you have for the best fish up there in the mountains Thor-Juergen?"
+"Here are our points, Havard."
+There is clapping, laughter.
+A man falls off his chair, perhaps overcome with aquavit.
+Or maybe it is the fumes from all that fish.
+Mexico's Enrique Pena Nieto faces tough start
+As Mexico's incoming President Enrique Pena Nieto prepares to take office, the BBC's Will Grant looks at the challenges facing him and the mixed expectations of his population.
+Traffic in Mexico City is particularly bad at present.
+A congested city at the best of times, a ring of steel has been erected since Monday cutting off several key routes into the capital and causing chaos on the roads.
+The aim, however, wasn't to stop commuters getting to work but prevent protesters from reaching parliament.
+On Saturday, Mexico's new president Enrique Pena Nieto will receive the presidential sash and take over the running of the nation.
+He faces a complicated task.
+Mexico has been performing well economically under the outgoing administration of Felipe Calderon, but the country is in the grip of a drug war, which has already claimed an estimated 60,000 lives in six years.
+"My government has a great commitment to the Mexican people to reduce the violence," Mr Pena Nieto told US President Barack Obama in the Oval Office earlier this week.
+I will be proposing a new security strategy which will allow us to achieve that aim.
+Before rubbing shoulders with the US president, Mr Pena Nieto's previous political experience was as governor of his home state, the State of Mexico.
+A populous, sprawling state surrounding the capital, opinions about the new leader are divided in his old stomping ground.
+A straightforward man
+In the bucolic town of Valle del Bravo, for example, he is remembered fondly.
+Residents credit him with boosting tourism in the resort and building infrastructure.
+To reach the town you can drive along one of Mr Pena Nieto's new motorways, a vast improvement on the cracked and bumpy roads it replaced.
+Plaques bearing his name also hang outside a modern sports centre and an impressive interactive museum about climate change.
+"We are looking to him to bring about real and lasting change," says friend and political ally Gabriel Olvera Hernandez, a state congressman for Mr Pena Nieto's party, the PRI.
+Particularly in terms of security and the economy, we're hoping for an interesting and true change which our country so badly needs.
+After an unbroken 81 years in power, the PRI was ousted in 2000 by Vicente Fox.
+Congressman Olvera admits that after 12 years outside the presidential palace of Los Pinos, there is much expectation within the party about Enrique Pena Nieto.
+And he rejects the opposition's characterisation of the new president as lacking substance.
+He's a very straightforward man, very committed with an excellent vision of the country.
+He's an excellent statesman and, above all, he's someone who knows how to listen.
+But on the other side of the state, that is not the impression many people have of their former governor.
+In Nezahualcoyotl, also known as Ciudad Neza, the contrast with the cobbled streets of Valle del Bravo couldn't be sharper.
+Tucked away under motorway flyovers, it is in many ways a suburb of Mexico City itself.
+And the problems in the municipality are also gritty and urban.
+Earlier this year, the military was called in to help tackle the drug gangs operating in the neighbourhoods, and violence against women is particularly acute.
+On a patch of wasteland by a vast landfill site, the bodies of dozens of murdered women have been dumped over the past two years alone.
+More than 1,000 women were killed in Mexico State while Mr Pena Nieto was governor, a rate much higher than in the notoriously violent city of Ciudad Juarez - a place synonymous with the murder of innocent women.
+Mr Pena Nieto's critics say, at best, he failed to adequately address the problem of femicide while he was in office.
+At worst, they accuse his administration of turning a blind eye.
+In a concrete home typical of the rundown neighbourhood, Irinea Buendia struggles to fight back the tears as she shows me photos of her late daughter, Mariana Luna.
+According to the official version of events, Mariana committed suicide in 2010.
+However her family believes she was murdered by her partner.
+"When I arrived at her house it seemed her body had been washed," Senora Buendia recalls.
+There were signs she'd been beaten, and rigor mortis had already set in.
+As her mother recounts the story, a picture of Mariana looks down from the walls, next to a cross bearing a single word: Justice.
+However, that is exactly what the family say they have been denied.
+The state authorities have treated me like I'm an old gossip, a trouble-maker, a whiner.
+What they want is that one simply accepts what they say and shuts up.
+"But that can't be right when there were so many irregularities and omissions," she says.
+As President Pena Nieto receives the sash on Saturday, it comes with a heavy responsibility.
+Tens of thousands of families have been affected by violent crime in Mexico over the past six years and the new president has promised to make them a priority during his time in office.
+"I hope he's the same kind of president as he was a governor," says PRI Congressman Olvera in Valle del Bravo.
+That, however, is exactly what victims' families in Ciudad Neza most fear.
+Bradley Manning didn't complain about mistreatment, prosecutors contend
+Prosecutors try to counter Bradley Manning's claims of abuse in confinement
+The hearing focuses on Manning's time in the military brig at Quantico, Virginia
+Defense wants case dismissed on grounds that Manning's confinement was harsh
+The Army private is accused of stealing thousands of classified documents
+Prosecutors tried to establish Friday that Army private Bradley Manning -- charged in the largest leak of classified material in U.S. history -- missed multiple opportunities to complain about the mistreatment he's alleging he suffered in military custody.
+While cross-examining Manning at a pre-trial hearing at Ft. Meade, Maryland, prosecutor Maj. Ashden Fein asserted that records of weekly visits Manning had with unit officers during nine months of detention at Quantico, Virginia, show no complaints about his treatment.
+The cross-examination -- during a hearing on a defense motion to have Manning's case dismissed on grounds that his confinement has been harsh and has amounted to enough punishment -- came a day after Manning testified that he had considered suicide while in custody.
+The Army intelligence analyst, arrested in June 2010, is accused of stealing thousands of classified documents while serving in Iraq.
+The material was then published online by WikiLeaks.
+WikiLeaks has never confirmed that Manning was the source of its information.
+In Friday's hearing, Fein reviewed with Manning the forms that officers filled out after meeting with Manning during his detention at Quantico's brig, where he was held under a heightened confinement status from July 2010 to April 2011.
+Officers would ask Manning questions and write down his responses.
+When Fein asked about the forms Friday, Manning acknowledged that he rated treatment by his guards as "excellent" and treatment by the facility overall as "very professional."
+The forms show no complaints of mistreatment, even though the officers asked Manning directly about his treatment, Fein contended.
+Manning responded that he would verbally express concern about issues and that the visiting officers would talk through the concerns and indicate that they would be addressed, but they didn't record the issues.
+"They would write down 'no issues' (after discussing the concerns), and it didn't necessarily mean I didn't bring something up," Manning said.
+The judge, Army Col. Denise Lind, also asked Manning why he didn't complain about his treatment during a January 2011 meeting with a board examining the suicidal thoughts he expressed in a form months earlier.
+Manning replied that his intention during that meeting was to get his "prevention of injury" status downgraded.
+The military said they put him on this restrictive status -- a step below suicide watch -- for his protection and the safety of others.
+"I wanted staff to know I was fine, and (I wanted to) get off the POI status ... to enjoy an increased quality of life from my viewpoint," Manning said.
+Manning testified Thursday about his arrest in Iraq and his transfer to Kuwait, where he was held for nearly two months before being transferred to the brig at Marine Base Quantico in Virginia in July 2010.
+He said he contemplated suicide in Kuwait and once passed out there due to the heat.
+He said not being allowed to know what was happening to him or in the outside world was distressing.
+"My world just shrink to Camp Arafjon, to that cage," Manning said Thursday.
+I thought I was going to die in that cage.
+Once at Quantico, Manning said, he spend most days in a small cell -- at least 21 hours and often more than 23 hours -- with no company.
+Manning said he was allowed only a mattress, blanket, flip-flops, some clothes and his glasses.
+He said he tried to keep moving, because sleeping during the day or even lying down was against the rules.
+Manning said he always slept with light from outside his cell in his eyes.
+If guards could not see his face when he rolled over at night, he said they would wake him to roll back over.
+Manning's lawyer filed a formal objection to Manning's treatment in January 2011.
+Manning was moved to the military prison at Fort Leavenworth, Kansas, in April 2011.
+Also Friday, the judge asked Manning about an allegation that he made in Thursday's testimony -- that after being forced to sleep naked one night in his Quantico cell, he was forced to stand naked in front of guards and other inmates during a morning head count.
+Manning had testified that he was never given a chance to cover himself with his blanket during the head count.
+Under questioning from the judge Friday, Manning said that he inferred from his guard's order that he should drop a blanket that could have covered him, but he acknowledged that no one had ordered him to drop it.
+Manning testified Thursday that he was forced to sleep naked the previous night because of his attempt to show an officer that he wasn't a danger to himself.
+Manning said that he told the officer that he could have used the waistband of his underwear or his flip-flops to hurt himself but hadn't done so.
+That night, Manning testified, his underwear, flip-flops and glasses were removed from his cell.
+His lawyers hope the judge will at least take his experiences during confinement into account and sharply reduce his sentence should he be convicted at his court-martial, which is expected to begin early next year.
+The defense has said it plans to have Manning plead guilty to lesser offenses and fight other charges as being too extreme.
+The hearing is scheduled to resume this weekend, with prosecutors expected to argue that the detention conditions were warranted.
+The Pentagon has maintained that Manning was held in accordance with rules governing all maximum-custody detainees at Quantico.
+Counts against Manning include aiding the enemy, wrongfully causing intelligence to be published on the Internet, transmitting national defense information and theft of public property or records.
+If he's convicted on all counts, he could face a life sentence.
+My Mexican-American identity crisis
+He says many were forced to leave Mexico because of the lack of opportunities there
+Mexicans tend to fault those who left; they remind Mexicans of hard times, he says
+Navarrette says Mexican-Americans are caught between two worlds
+On a recent trip to Mexico City, I had barely made my way down the concourse and arrived at the immigration processing area when I got stumped.
+Signs pointed the way to two lines: one for "Mexicanos" ("Mexicans"), another for "Extranjeros" ("Foreigners.")
+I stood there for a few seconds, unsure of where to go.
+Growing up in Central California, I had been called a "Mexican" my entire life.
+It's ethnic shorthand in the same way that my friends in Boston refer to themselves as "Irish" or my friends in New York describe themselves as "Italian."
+Later, I settled on "Mexican-American."
+But, this was Mexico.
+And, in the homeland of my grandfather, there was no need for shorthand or hyphens.
+I was simply an American.
+I speak Spanish, good enough to handle either end of an interview in that language.
+But I don't have the vocabulary of a native, and I can't shake my American accent.
+So I took my U.S. passport and got in the line for Extranjeros.
+I thought about that moment this week when Mexican president-elect Enrique Pena Nieto visited the White House to meet with President Obama.
+On the agenda, as usual, when the leaders of these two countries meet: immigration, drugs and trade.
+Pena Nieto was also eager to talk about the growth of the Mexican economy, which is one reason that Mexicans are now just as likely to stay in Mexico as venture to the United States.
+He wants to partner with the United States and Canada, and create a European Union-style trading bloc in North America.
+And Pena Nieto vowed to continue Mexico's war against the drug cartels, even though he offered no specifics.
+For Mexico, the relationship with the United States is complicated and filled with hard feelings.
+Most Americans probably never give a thought to the fact that, in 1848, the United States invaded Mexico and forced its leaders to sign over half their territory at the point of rifle.
+But for Mexicans, who think in terms of centuries, not minutes, the reminders are everywhere.
+So the minute that a U.S. official says anything the least bit critical of Mexico, you start hearing -- in the Mexican press, and among the elites -- complaints about how the Americans are encroaching upon their neighbor's sovereignty.
+And the children of Montezuma go on the warpath.
+And yet, for Mexico, the really challenging relationship is with the more than 35 million Mexican-Americans living in the United States.
+You want to talk about hard feelings?
+There is plenty.
+Mexico has winners and losers, people for whom the country provides opportunities and others for whom it doesn't.
+The only reason you have so many people of Mexican ancestry living in cities like Los Angeles, Las Vegas, Phoenix, Denver or San Antonio is because, at some point in our family tree, there was a person, maybe a parent or grandparent, who was shut out from opportunity in Mexico and had to go north.
+And more often than not, that person fit a profile -- dark skin, little education, from a poor village, etc.
+We're their offspring, and we're loyal to them.
+Not Mexico.
+And even though we may now be living the American Dream, having gone to good schools and taken good jobs, we can never lose sight of the fact that it's the American Dream we're living, and not the Mexican one.
+Our identity might sometimes be fuzzy, but our loyalty is clear.
+It's to the United States.
+Besides, we're aware that many of the elite Mexicans in the ruling class don't like us.
+The feeling is mutual.
+They see us as a reminder of a humiliating defeat and look down on us as inferior stock that isn't sufficiently Mexican.
+Our Spanish will never be good enough, our ties to Mexico never strong enough.
+Our existence is, as they see it, all about failure.
+If our families hadn't failed in Mexico, they wouldn't have left.
+And we wouldn't now find ourselves trapped behind the silk curtain, living well in the United States but lost souls nonetheless.
+My wife, who was born in Guadalajara and came to the United States legally as a child, reminds me that there is friction between Mexicans and Mexican-Americans because Mexicans have a firmer grasp of who they are and Mexican-Americans resent that.
+While she's a U.S. citizen, she sees herself as a part of two countries.
+Meanwhile, many Mexican-Americans I know don't feel like they're a part of either.
+We love listening to the Mexican band, Los Tigres del Norte, but also to Bruce Springsteen.
+You get the best of both worlds, but you're rooted in neither.
+In Mexico, we're seen as Americans.
+And in the United States, we're considered Mexican.
+Now, to complicate the relationship even further, as I learned during my trip, some Mexican leaders and parts of the intelligentsia want to reconnect with the Diaspora.
+They want to put Mexican-Americans to work as makeshift "ambassadors" for Mexico, representing its interest in the United States.
+We would tell our fellow Americans what a great country this is to visit and pressure political leaders to strengthen ties with Mexico.
+Yeah.
+That's not going to happen.
+Too many hard feelings.
+And, with income inequality and rampant corruption and drug violence, many of us are not so sure that it is a great country.
+I'm afraid you're on your own, amigos.
+That's fair.
+If at least some Mexicans aren't yet ready to forgive the United States for how it treated Mexico a century and a half ago, then they have to accept the fact that some Mexican-Americans still hold a grudge for how their family members were treated much more recently than that.
+Hmmm.
+Maybe we're more "Mexican" than I thought.
+Old battles, new Middle East
+The ceasefire between Israel and Hamas could yet be an unlikely foundation for peace
+Can there ever be a lasting peace between Arabs and Jews in the Middle East?
+Another round of bloodshed suggests that any such hope is vain.
+Amid the usual futile arguments over who started it, scores of buildings have been reduced to rubble; more than 140 Palestinians, most of them civilians, and six Israelis have been killed; and, for the first time, missiles from Gaza have landed near Tel Aviv, Israel's metropolis, and the holy city of Jerusalem.
+But though the Israelis and Palestinians seem stuck in their ancient conflict, all around them the Middle East is changing.
+The Arab spring has thrown the pieces up in the air, and, like it or not, the Palestinians and Israelis are caught up in the regional turmoil.
+Maybe this will make their struggle bloodier than before.
+However, there are reasons for thinking it could just break their lethal stalemate.
+A war that is neither lost or won
+At first sight, optimism looks very hard to justify now.
+Even if the ceasefire agreed on November 21st holds, this week's fighting has strengthened the hawks on both sides.
+The leaders of Hamas, the Islamist movement that has ruled Gaza since 2007, will claim to have forced the Israelis to back off, even though Gaza has taken a drubbing.
+Despite killing some of its leaders and bottling up Gaza's 1.7m people in one of the most wretched and crowded corners of the planet, Israel has failed to destroy Hamas.
+Indeed Hamas is gaining on the West Bank, the other bit of Palestine currently run by its bitter rivals in Fatah, the more moderate Palestinian faction.
+Moreover, Hamas's leaders may well conclude that time is on their side.
+As Islamists across the Arab world have gained clout, so Hamas has made powerful and rich friends.
+Turkey, a resurgent regional power that was once Israel's closest Muslim ally, has taken up Hamas's cause; so has Qatar, one of the richest and most dynamic of the Gulf states.
+Jubilant Hamas people say an Islamist crescent is curving around Israel, from Lebanon in the north, where the Hizbullah party-cum-militia holds sway, through Syria, where rebels of an increasingly Islamist bent may topple Bashar Assad, and on down through Jordan, where Hamas's allies are menacing the king.
+Above all, on Israel's southern flank, the rise of the Muslim Brotherhood under President Muhammad Morsi in Egypt, by far the most populous and pivotal of Arab countries, has changed the region's balance.
+Hosni Mubarak, the secular despot who ran Egypt for 30 years until his downfall in 2011, had little time for Hamas.
+By contrast, the Brotherhood is a cousin of Hamas, and its leaders are more subject to popular opinion.
+In future diplomacy Hamas may emerge as an actor that cannot be shut out even by Israel and America.
+Meanwhile, Israel's hardliners will draw the opposite conclusions.
+In military terms, Hamas has been put back in its box.
+Israel's Iron Dome anti-missile system has proved its worth and many of Hamas's missiles have been destroyed.
+Israelis will sleep more soundly - for a while.
+In diplomatic terms, America is as steadfast as ever; many European countries also blamed Hamas for starting the latest round of violence.
+Above all, Israel has prospered, especially under Binyamin Netanyahu, a prime minister who has largely ignored the peace process.
+Although rockets from Gaza have killed around 30 Israelis since 2004, Israel has been fairly free of suicide-bombers, thanks in part to the barrier that bites into the West Bank, the main chunk of a would-be Palestinian state, and protects the Jewish settlements that continue to expand despite their illegality in international law.
+Mr Netanyahu, whose Likud party has merged with an even more hawkish lot under Avigdor Lieberman in the run-up to an election on January 22nd, is sitting pretty.
+Why coddle those twisty Palestinians by giving them a state of their own?
+If they really ran the West Bank, would they not fire rockets, just as their compatriots have done in Gaza?
+Better to keep them behind that wall and smite them if they raise their heads.
+Maybe the hardliners will win out; yet the Arab spring may change their calculations.
+Even if the Islamists taking power in Egypt and elsewhere have little love for Israel, their priority will be tackling difficulties at home.
+Israel's defence budget is bigger than that of its four Arab neighbours combined.
+Starting a war with the local superpower will hardly help the new Arab governments mend their economies.
+That the pragmatic Mr Morsi worked with Barack Obama to obtain a ceasefire augurs well - and might just mark the start of something.
+Israelis too should look to the longer term.
+With the rest of the Arab world becoming more democratic, depriving Palestinians of their right to self-determination is creating a powder keg that is bound one day to explode in the territories occupied by Israel - much as a bus exploded in Tel Aviv this week.
+Repression is already undermining democracy in the Jewish state, and demography exacerbates this as the Arab population swells.
+Bloody missions against Gaza every few years to knock back Hamas will exact a growing diplomatic toll.
+Both sides need prodding by outsiders
+The answer remains the one trumpeted by sensible people on both sides, most of the outside world and this newspaper: two states, with Israel ceding territory for security.
+The hope - a small one in the short term - is that the ceasefire will give a little more leverage to outsiders pushing that cause.
+Egypt, which must now set about stopping the flow of arms into Gaza, along with Turkey and Qatar, is better placed than ever to persuade Hamas to accept the idea of a Jewish state based on the 1967 boundaries with land swaps and a shared Jerusalem.
+Arab outsiders should also press Hamas and Fatah to come together.
+That would do more to create a Palestinian state than the imminent bid for virtual statehood at the UN.
+Mr Obama also has a part in getting Israel to the table.
+During his first term, he neglected to present his own plan for peace.
+Back in the White House, he is looking just as reluctant to be drawn in.
+This is woefully short-sighted.
+America has a vital interest in a stable Middle East.
+That means a peace settlement between Israel and the Palestinians.
+Cigarette plain packaging laws come into force in Australia
+Smoking warnings and diseased body parts emblazoned on dull green boxes that are the same for all tobacco brands
+Australia's world-first laws on cigarette and tobacco plain packaging have come into force, replacing brand logos and colours with generic drab olive green coverings, gruesome pictures of diseased body parts and depictions of children and babies made ill by their parents' smoking.
+Apart from the varying health warnings and images the only difference between the packs, mandatory from Saturday, are the brand names, and these are all printed in identical small font.
+It is the world's most strict regime for the packaging of tobacco.
+Australia's federal government says the aim is to deter young people from smoking by stripping the habit of glamour.
+It is relying on studies showing that if people have not started smoking by age 26 there is a 99% chance they will never take it up.
+"Even from a very early age you can see that kids understand the message that the tobacco company is trying to sell through their branding," said the federal health minister, Tanya Plibersek, citing studies that showed, for example, children linking a crown in a logo with the idea of being a princess.
+While Australia has one of the world's lowest smoking rates and the changes will have little impact on multinationals' profits, other countries are considering similar steps.
+The tobacco industry lobbied hard against the laws.
+Tobacco firms said they would boost black market trade, leading to cheaper, more accessible cigarettes.
+"There will be serious unintended consequences from the legislation," said Scott McIntyre of British American Tobacco Australia.
+Counterfeiters from China and Indonesia will bring lots more of these products down to sell on the streets of Australia.
+Others say the laws have boosted their business.
+Sandra Ha of Zico Import Pty Ltd, a small family business, said demand for cigarette cases, silicon covers to mask the unpalatable packages, had shot up from almost nothing two months ago since British American Tobacco, Britain's Imperial Tobacco, Philip Morris and Japan Tobacco lost a challenge to the laws in Australia's high court.
+Ha said Zico had sold up to 6,000 to wholesale outlets and was awaiting new stock.
+This is good business for us.
+The potential hitch, experts say, is the popularity of social media with the very demographic the plan is targeting.
+After a series of Australian laws banning TV advertising and sports sponsorship and requiring most sellers to hide cigarettes from view, tobacco marketing has moved online.
+Australia has banned web advertising by local companies and sites but cannot restrict overseas sites.
+"If you are a tobacco marketer and you've only got this small window left to promote your products, online is the compelling place for you to be in," said Becky Freeman, a public health researcher at Sydney University.
+Freeman noted an increase in "average Joe" reviews of brands on social media sites such as YouTube, Twitter and Facebook.
+We have to ask, is that just a private citizen who really loves Marlboro cigarettes and they've gone to the trouble of making a video, or is there a marketing company involved?
+British American Tobacco Australia said the industry was focused on dealing with the new rules rather than marketing.
+The industry has gone as far as paying for Ukraine, Honduras and the Dominican Republic to challenge the new rules - the countries are claiming at the World Trade Organisation that trade is being unfairly restricted, despite none of the countries having significant trade with Australia.
+A WTO ruling is likely in mid-2013.
+Plibersek said the government had held discussions with other countries considering similar laws on packaging.
+Canada was the first country to make photograph warnings mandatory in 2001.
+They now extend to more than 40 countries including Brazil, Turkey and Ukraine.
+Tougher laws are being considered in Britain, New Zealand, South Africa and India.
+Many smokers in Australia remain defiant.
+The pictures don't affect me.
+I just ignore them.
+"You just grab a smoke and put it away," said Victor El Hage as he purchased a pack with a photograph of a mouth tumour.
+Honestly, there's only one reason I'd stop, and that's my little girl.
+James Yu, who runs the King of the Pack tobacconist in central Sydney, said the uniform packaging made it harder to stack his shelves
+"It used to take me an hour to unload a delivery, now it takes me four hours," Yu said.
+"The government should have just banned them altogether and then we'd go OK, fine, we're done, we'll shut up shop," he said, throwing his hands up in the air.
+In a Constantly Plugged-In World, It's Not All Bad to Be Bored
+I spent five unexpected hours in an airport this Thanksgiving holiday when our plane had mechanical difficulties and we had to wait for another plane to arrive.
+So I had plenty of time to think about the subject of boredom.
+I won't lie to you.
+Half a day in an airport waiting for a flight is pretty tedious, even with the distractions of books, magazines and iPhones (not to mention duty-free shopping).
+But increasingly, some academics and child development experts are coming out in praise of boredom.
+It's all right for us - and our children - to be bored on occasion, they say.
+It forces the brain to go on interesting tangents, perhaps fostering creativity.
+And because most of us are almost consistently plugged into one screen or another these days, we don't experience the benefits of boredom.
+So should we embrace boredom?
+Yes.
+And no.
+But I'll get back to that.
+First of all, like many people, I assumed that boredom was a relatively recent phenomenon, with the advent of more leisure time.
+Not so, says Peter Toohey, a professor of Greek and Roman history at the University of Calgary in Canada and the author of "Boredom: A Lively History" (Yale University Press, 2011).
+"Boredom actually has a very long history," he said.
+There's Latin graffiti about boredom on the walls of Pompeii dating from the first century.
+Then there's the question of how we define boredom.
+The trouble is that it has been defined, and discussed, in many different ways, said John D. Eastwood, an associate professor of psychology at York University in Ontario, Canada.
+After looking over the research literature and putting the idea in front of a focus group of about 100 people, Professor Eastwood and his colleagues defined boredom as an experience of "wanting to, but being unable to engage in satisfying activity."
+What separates boredom from apathy, he said, is that the person is not engaged but wants to be.
+With apathy, he said, there is no urge to do something.
+The core experience of boredom, he said, is "disruption of the attention process, associated with a low mood and a sense that time is passing slowly."
+Boredom can sound an awful lot like depression.
+But Professor Eastwood said that while they can be related, people who are bored tend to see the problem as the environment or the world, while people who are depressed see the problem as themselves.
+Sometimes we think we're bored when we just have difficulty concentrating.
+In their study, "The Unengaged Mind: Defining Boredom in Terms of Attention," which appeared in the journal Perspectives on Psychological Science in September, Professor Eastwood and his colleagues pointed to an earlier experiment in which participants listened to a tape of a person reading a magazine article.
+Some groups heard a loud and unrelated television program in the next room, others heard it at a low level so it was barely noticeable, while the third group didn't hear the soundtrack at all.
+The ones who heard the low-level TV reported more boredom than the other two groups - they had difficulty concentrating but were not sure why, and attributed that difficulty to boredom.
+When you're trying to focus on a difficult or engaging task, disruption of attention can lead to boredom, said Mark J. Fenske, an associate professor of neuroscience at the University of Guelph in Ontario and one of the authors of the study.
+On the other hand, when you're doing something dull, "such as looking for bad widgets on a factory line, distracting music can help you not be bored."
+In fact, he said, we now know that squirming and doodling, often seen as a sign of boredom, can actually help combat it by keeping people more physically alert.
+"Research shows that kids who are allowed to fidget learn more and retain more information than those who are forced to sit still," Professor Fenske said.
+We all experience boredom at some points - my flight delay, a droning speaker, a particularly tedious movie.
+But some individuals are more likely to be bored than others.
+To help measure this, researchers developed a "Boredom Proneness Scale" in the 1980s.
+The scale includes questions like, "Many things I have to do are repetitive and monotonous," and "I have so many interests, I don't have time to do everything."
+Using such scales, researchers have discovered that boys tend to be bored more often than girls, said Stephen Vodanovich, a professor of psychology at the University of West Florida, especially when it comes needing more, and a variety of, external stimulation.
+But in general, teenagers are a pretty jaded lot.
+In 1991, Reed Larson, a professor of human and community development at the University of Illinois, conducted an experiment in which he contacted almost 400 teenagers and their parents seven to eight times a day by beeper.
+He found that 32 percent of adolescents said they were bored in school and doing homework, while 23 percent said they were bored when they weren't in school.
+On the other hand, 3 percent of parents said they were bored.
+Professor Larson said he did not know whether the boredom percentages now, 21 years later, would be higher or lower.
+But he said he did know that "adolescence is a peak period for boredom," largely because children and teenagers are not given a lot of control over what they want to do.
+So back to my original question: Is boredom good for you?
+Sometimes no, because in its extreme it can lead people to take absurd physical risks, gamble or indulge in substance abuse as a way to ease it, research shows.
+On the other hand, many philosophers and writers discuss the connection between boredom and creativity, said Professor Vodanovich, who has been studying the issue for more than two decades.
+"Boredom is the brain's way to tell you you should be doing something else," said Gary Marcus, a professor of psychology at N.Y.U.
+But the brain doesn't always know the most appropriate thing to do.
+If you're bored and use that energy to play guitar and cook, it will make you happy.
+But if you watch TV, it may make you happy in the short term, but not in the long term.
+So if your child is bored and you give him an iPad, he may not be bored anymore, but he hasn't learned how to entertain himself, or self regulate, Professor Fenske said.
+And "that self-regulation transfers from one situation to other," he said.
+Your kid doesn't just learn to entertain himself, but gets more self-control in other areas.
+I don't think we really want to celebrate boredom.
+Nor should we be too critical of it.
+Rather, our goal should be to feel comfortable away from the constant chatter of activity and technology.
+Professor Eastwood agreed.
+"We frame it as we need to be bored more, but boredom is an agonizing, restless desire to be connected with something meaningful," he said.
+What people are really searching for, he said, is a way to unplug and enjoy down time.
+"In an environment where we are constantly overstimulated," he said, "it's hard to find ways to engage when the noise shuts down."
+In Colorado, No Playbook for New Marijuana Law
+Anthony Orozco, 19, a community college student and soccer player in southeastern Colorado, is facing criminal charges for something that will soon be legal across this state: the possession of a few nuggets of marijuana and a pipe he used to smoke it.
+Mr. Orozco said that one day in September he and a few friends were driving in Lamar, on the plains near the Kansas border, when they were pulled over.
+After the police officer found marijuana in the car, Mr. Orozco was issued a summons for possession and drug paraphernalia - petty offenses that each carry a $100 fine - and given a court date.
+"We get treated like criminals," Mr. Orozco said.
+But is he one?
+In the uncertain weeks after Colorado's vote to legalize small amounts of marijuana for recreational use, the answer in hundreds of minor drug cases depends less on the law than on location.
+Hundreds of misdemeanor marijuana cases are already being dropped here and in Washington State, which approved a similar measure.
+Police departments have stopped charging adults 21 years and older for small-scale possession that will be legally sanctioned once the laws take effect in the coming weeks.
+But prosecutors in more conservative precincts in Colorado have vowed to press ahead with existing marijuana cases and are still citing people for possession.
+At the same time, several towns from the Denver suburbs to the Western mountains are voting to block new, state-licensed retail marijuana shops from opening in their communities.
+"This thing is evolving so quickly that I don't know what's going to happen next," said Daniel J. Oates, the police chief in Aurora, just east of Denver.
+Regulators in Washington State are also scratching their heads.
+And they are looking for guidance on how to set up a system of licenses for production, manufacturing, distribution and sales - all by a deadline of Dec. 1, 2013.
+They say that Colorado, for better or worse, is ahead of most states in regulating marijuana, first for medical use and now recreationally.
+"Colorado has a more regulated market, so they will be a good guide," said Brian E. Smith, a spokesman for the Washington State Liquor Control Board.
+But no place or system, Mr. Smith conceded, can do more than suggest what might work.
+"There's no real precedent for us to follow," he said.
+Washington's law, called I-502, takes effect on Dec. 6, which also leaves a year of limbo during which the state licensing system will not yet exist, but legalized possession will.
+And there are thorny mechanical questions that must be resolved during that time, like how to balance the state's mandate of "adequate access" to licensed marijuana with its prohibitions on cannabis businesses within 1,000 feet of a school, park, playground or child care center.
+"Nowhere will it be more difficult to site a licensed cannabis business than in urban areas, particularly in the Seattle metropolitan area," said Ben Livingston, a spokesman for the Center for Legal Cannabis, a recently formed research group.
+On Nov. 21, Chief Oates in Aurora sent his officers an e-mail announcing that the city attorney would no longer be prosecuting small marijuana violations for anyone 21 years or older, and that the police would stop charging people for those crimes "effective immediately."
+Chief Oates said that the police would enforce city codes regulating medical marijuana growers, and that they would still pursue drug traffickers and dealers.
+In northern Colorado's Weld County, the district attorney, Ken Buck, represents a stricter view.
+After the vote, he said his office would continue pursuing marijuana possession cases, mostly as a way to press users into getting treatment.
+Right now, 119 people face charges of possessing two ounces or less of marijuana, though many are facing other charges.
+"Our office has an obligation to prosecute offenses that were crimes at the time they occurred," Mr. Buck said in a statement.
+The response has been complicated even in places like rural Mesa County, where voters rejected the marijuana initiative.
+The police in Grand Junction, the county's largest city, are no longer citing adults for possession of small amounts.
+The county's district attorney, Pete Hautzinger, supported that decision, but also decided not to dismiss all of the pending possession cases.
+"I do not think I'm wasting my time continuing to enforce the law until it changes," he said.
+Although 55 percent of Colorado voters supported the measure, bringing recreational marijuana into the folds of government and the legal system was never going to be simple.
+And the contradictory reactions across the state lay bare a deep ambivalence among local officials about the state's big green experiment.
+"It's a cultural barrier" with district attorneys, said Sean McAllister, a Denver lawyer who represents marijuana defendants and is a local spokesman for the National Organization for the Reform of Marijuana Laws.
+"They spent so much of their lives prosecuting people that they still don't really accept that this is legal," he said.
+As the first states to treat small amounts of marijuana like alcohol, Colorado and Washington are poised to become national test cases for drug legalization.
+As advocates and state officials plan for a new frontier of legalized sales, they are also anxiously awaiting direction from the federal government, which still plans to treat the sale and cultivation of marijuana as federal crimes.
+Advocates for legalized marijuana are hoping the Justice Department yields.
+Despite some high-profile arrests of medical marijuana patients and sellers, the federal government has mostly allowed medical marijuana businesses to operate in Colorado, Washington and 16 other states.
+While drug agents will probably not beat down doors to seize a small bag of the drug, they are likely to balk at allowing the state-regulated recreational marijuana shops allowed under the new laws, said Kevin A. Sabet, a former drug policy adviser in the Obama administration.
+Several cities in Colorado are not waiting for federal authorities to act.
+Even before Election Day, some local governments approved moratoriums on any new marijuana shops, even though it will be about a year before any can open.
+Last week, the western city of Montrose took up a six-month ban, and is likely to pass it next week.
+"We don't want to be put in a position where we license somebody and then have a big federal issue," said Bob Nicholson, a City Council member.
+Our community voted against this amendment.
+We're looking at what the community voted for versus what the state voted for.
+There's an awful lot of questions.
+Petronella Wyatt: I was bullied out of Oxford for being a Tory
+It is not just today's university students who are attacked for their views
+I can't remember a time when I didn't dream of winning a place at Oxford University.
+Both my father and my elder brother had been at what I imagined was the world's greatest seat of learning, a modern-day wine-blushed Greek symposium encouraging the dual pillars of civilisation, free thinking and tolerance.
+Yet, within two weeks of taking up my place at Worcester College in the late Eighties to read history, I'd packed my bags, precipitating the first scandal of my life.
+My father broke down and cried.
+Friends were baffled.
+The Evening Standard diary claimed I'd quit because I objected to fellow undergraduates having sex in the room next to mine.
+The writer A N Wilson announced waggishly that I'd departed because I was forced to drink out of chipped mugs.
+The truth was less droll.
+I ran away.
+Yes, ran, because I had been subject to systematic bullying and intimidation.
+Not on account of my rather outré name, or the fact that I came from a private school.
+I was persecuted for one reason only, and in this cradle of supposed enlightenment it was both bigoted and barbaric: my father, the late Woodrow Wyatt, was a high-profile adviser to Margaret Thatcher and I was a Conservative supporter.
+Why bring this up now, you might ask.
+Well, recent reports suggest that a new generation of Right-of-centre students are suffering a similar persecution.
+Such is the institutionalised and increasing hatred of Tory students at Oxford that last week a group of them demanded the same equal-rights protection as gays, disabled people and ethnic minorities.
+Conservative members of Corpus Christi College's junior common room (JCR) claim they are "often actively isolated, personally attacked and made to feel unwelcome" because of their political views.
+They want to create a post on the college's equal opportunities committee to ensure that their opinions can be aired freely.
+Their situation wasn't helped by a recent BBC Two documentary, Wonderland: Young, Bright and on the Right, about student politics, which portrayed Tories as oddballs and neo-Nazis.
+It featured graduate Joe Cooke, former president of the Oxford University Conservative Association (OUCA), travelling in a Rolls-Royce, sporting a silver suit and silver-topped cane.
+At other universities, Conservative students say they are being treated as "scapegoats" for the introduction of higher tuition fees."
+Luke Black, 20, vice-president of Nottingham University Conservative Association, told a Sunday newspaper that "there is a growing Left-wing bias at universities.
+People assume we are like the Bullingdon Club without meeting us."
+Samuel Roberts, 21, a history student at Corpus Christi, who proposed the motion for greater protection, says such a climate is "uncomfortable," while Stephanie Cherill, 19, president elect of OUCA, says there has been a deterioration in the attitude of JCR members towards people who are Right of centre.
+"This poses a threat to the atmosphere of intellectual discussion, as well as to the welfare of members," she says.
+I was in a minority of one during my first few weeks at Oxford.
+I had gone up in September 1986, a cripplingly shy 18-year-old.
+Hatred of the Conservative Party was at its most febrile.
+The year before, the university had voted to refuse Margaret Thatcher - a former student - an honorary degree, because of cuts in higher education funding.
+The atmosphere would have made a Stalinist shudder with apprehension.
+During the first few days of freshers" week, when new students socialise with each other and the dons, I had a taste of the wormwood that was to come.
+I was to find that the dons not only connived in the taunting of Tory undergraduates but took part with relish.
+The politics of the miners" strike, privatisation and the government's opposition to sanctions against apartheid South Africa were brought into the wood-panelled rooms of the tutorial.
+My first one involved translating 18th-century French texts into English, and I was unprepared for what followed.
+"Miss Wyatt," said the don, Harry Pitt (now deceased), "please translate the first paragraph."
+I stumbled over it.
+A small man with a face like cake batter, Pitt was big on bile.
+"Do Thatcherites refuse to learn French or are they just stupid?" he demanded.
+The other undergraduates giggled.
+Tears pricked the back of my eyes.
+"I suggest you take some basic French lessons in your spare time - that is, if you're not too busy socialising," Pitt snarled.
+I walked back to my rooms a disconsolate figure.
+At dinner in college that evening I sat by myself; then I felt a light tap on my shoulder.
+It was a second-year English student named James who introduced himself as a member of the OUCA.
+"I know who you are," he said kindly.
+I'm afraid it's like that.
+Anyone suspected of being a Tory is picked on.
+It's bad enough for me, but they know your father is close to Margaret Thatcher, so it will be worse for you.
+Most Tory freshers pretend they're Labour.
+Later, at a local pub, I cravenly attempted to dissimulate.
+I insisted that I didn't agree with everything Mrs Thatcher said.
+This ploy proved unsuccessful.
+A first year PPE student, who, ironically, had been to Eton, said: "You're the daughter of a fascist pig."
+You're contaminated.
+Other students took up the refrain.
+I was perverted, dirty.
+"How do Tories have sex?" one asked.
+They beat each other, don't they?
+I felt the way homosexuals must have felt before the liberal legislation of the Sixties.
+Would I ever be able to lead a normal life at Oxford?
+Would I be forced to meet like-minded people only after dark?
+Would I have to turn to Labour and suppress my natural inclinations?
+The three years before me stretched out as a purgatory of ostracism and isolation.
+The only openly Tory don was Norman Stone, Professor of Modern History, who was based at my college.
+He was hated for being not only a Conservative but a foreign policy adviser to Thatcher and one of her speech writers.
+He was hardly ever there.
+He loathed the place as provincial and petty, and for its adherence to the Marxist-determinist view of history.
+In 1997 he took up a professorship at the University of Bilkent, in Ankara, Turkey.
+"You won't be happy here," he told me.
+I began commuting from Oxford to my parents" house in London, finding refuge with my more open-minded metropolitan friends and family.
+I told my father I hated Oxford and why.
+He was incredulous.
+During his time there in the Forties, all political views had been accepted.
+"But it's the best place in the world," he said pathetically.
+They wouldn't do that, not among my dreaming spires.
+Even my Communist friends always had impeccable manners.
+His rheumy eyes began to cloud.
+Give it a chance.
+I'm sure it's all just a tease.
+It would break my heart if you left.
+Exhausted by my frequent trips to London, my emotional resistance was deteriorating.
+A male friend of mine, also a Tory supporter, had succumbed to pressure and renounced his creed.
+During a tutorial the following week, when another history don had suggested, in complete seriousness, that I was an "enemy of the people," I decided to do the same.
+Inwardly blushing with shame, I admitted to being "brainwashed by my parents" and called them "old fools."
+The respite was short.
+It was my father who drove the nail into the coffin of my Oxford career.
+At the time, he wrote two columns in the Murdoch press each week.
+My door was locked.
+I cowered inside, and after five minutes, my pursuers gave up.
+When they left, I packed a suitcase and caught the first train to London.
+I never went back.
+You may call me a snivelling wimp.
+But no 18-year-old should be subject to such intimidation and vitriol in an educational institution.
+Even more tragic is that it was Oxford, which not only produced 14 Tory prime ministers, but, to this day, hides behind an ill-deserved reputation for equality and freedom of thought.
+"Valentino prefers elegance to notoriety"
+On the occasion of the "Valentino: Master of Couture," an exhibition that opened this week in London, ABC speaks with Naty Abascal, Fiona Thyssen-Bornemisza and other of the Italian designer's famous clients.
+Somerset House, former home of Queen Elizabeth I of England, is the only place in the British capital worthy of hosting a Valentino Garavani exhibition.
+During the inauguration of "Valentino: Master of Couture," the designer acknowledged a retrospective apotheosis that brings together over 130 couture gowns created by his fashion house over the past 50 years.
+"I love this palace" he says, in his unmistakable Italian accent.
+This exhibition is the culmination of a story whose only protagonist is "signore" Garavani, although it could not have been written without his distinguished clients.
+Valentino has always been fascinated by the rarefied and distant world of the nobility.
+In the first room of the exhibition, open until March 3, there are a number of private letters and photos signed by the cream of aristocracy, from Princess Salimah Aga Khan, Lord Snowdon, Princess Marie-Chantal of Greece to Margaret of England.
+Valentino exhibits these personal memories as if they were trophies of his social ascent from humble couturier in Voghera, northern Italy, to idol of the international jet-set.
+There is nothing wrong with loving royalty.
+"At least they don't drop cigarette ends on your magnificent carpet, like some beautiful pop music celebrities do," says Baroness Fiona Thyssen-Bornemisza.
+In the '60s and '70s, we both lived in the Alps and were good friends.
+Valentino is a spectacular host whose entertains with generosity and elegance.
+"We all loved being invited to his chalet in Gstaad" says "Heini" Thyssen's ex-wife, a close friend of forgotten beauties such as Marella Agnelli and Eugenie Niarchos.
+Valentino has always preferred elegance to notoriety.
+And yet, he is a star.
+Valeria Mazza, wearing a Valentino.
+The Argentine model Valeria Mazza also recalls the couturier's charisma.
+Many years ago, after a fashion show in Piazza di Spagna in Rome, we went for dinner at his flat.
+There were twenty of us, including Sharon Stone and John Kennedy Jr.
+You could see and feel his "spirit" in every detail of the flat and its decor, the food and the music.
+"All the guests were made to feel important and loved" recalls the top model, who started working with him during Haute Couture Week Paris, in 1995.
+"His designs are works of art and so never go out of fashion" she concludes.
+Nobility parade
+Garavani's life is not a story of obsession, but of well reciprocated love.
+He loves well-educated people who come from good backgrounds, and they love him.
+One of the Somerset House galleries has been transformed into a glamorous, sixty-foot long catwalk which offers a role reversal: visitors take the place of the models and have to parade down the catwalk while looking at a dream "audience" wearing Valentino masterpieces, for example, the dress Jackie Kennedy chose for her wedding with Aristotle Onassis, the costume Monica Vitti wore in "La Notte" and the wool and leather coat that belonged to Empress Farah Diba.
+In this crowd of mannequins, names stand out such as Sibilla of Luxembourg, Gloria von Thurn und Taxis, Mette-Marit of Norway, Rosario of Bulgaria and Sofia of Habsburg.
+Naty Abascal and the designer, in 2006
+Many of these clients say your first Valentino is like your first love, "impossible to forget."
+I remember it perfectly.
+It was a pair of trousers, a shirt, a "gilet" waistcoat and jacket from the 1971-1972 autumn-winter collection.
+"It was a gift he gave me" says Naty Abascal, one of the designer's muses.
+"I prefer him to other designers because of his femininity, his great love of women, and because he enhances our beauty" added the former Duchess of Feria.
+I love the colours he uses, they really stand out and "lend themselves" to your face.
+Their proportions are perfect.
+The princess and fashion advisor Patricia della Giovampaola d'Arenberg also remembers the first time she wore a Valentino.
+As a teenager living in Italy, I dreamed of reaching the age when I'd have the chance to wear one of his evening gowns...
+My time finally came in the late '90s.
+I bought my first Valentino dress to wear at a party in the castle belonging to my cousin, Prince Edouard de Ligne.
+It was a red dress, with a frilly skirt, draped "corsage" and a strapless neckline.
+"It was a dream come true" says Princess D'Arenberg, the widow of Rodrigo d'Arenberg.
+"Valentino is indifferent to fashion, his obsession is timeless" says this Italian aristocrat who lives between Paris, New York and Buenos Aires.
+Princess D'Arenberg looks after her couturier gowns with "the utmost care ... because a dress not just a dress, it's also the many memories that go with it."
+The "king" of fashion
+The "grand finale" of the Somerset House exhibition is Marie-Chantal Miller's wedding dress from her marriage to Paul of Greece in 1995.
+It took four months' work and 25 "girls" (as the designer calls his seamstresses) to create the pearl-encrusted, ivory-coloured silk gown with twelve different types of lace and a train four and a half metres long.
+According to journalist Suzy Menkes, the leading authority of the specialist press, that dress represents a high fashion milestone of the late 20th century, "the return of high society clients."
+Dazzled for years with the "savoir-être" of the elite, Valentino is now its finest exponent.
+Cavaliere di Gran Croce (the highest-ranking distinction in Italy), Cavaliere del Lavoro, Commandeur de L'Ordre des Arts et des Lettres, and awarded the Legion of Honour, Garavani accumulates as many honours as any of his clients' husbands.
+"I've always been struck by his refined and calm manner, and his neat and perfect appearance" acknowledges D'Arenberg.
+The last time I saw him was a month ago at a gala dinner at the Orsay Museum.
+He was on the table of Countess Jacqueline de Ribes, a great friend of mine.
+"He was immaculate, time stands still for him."
+If a princess says that...
+The hardest job in the world: the human mules of Kawah Ijen
+For four euros, the Indonesian volcano porters risk life and limb carrying 70 kilos of sulphur along steep stone paths.
+There are people for whom work is hell, and others who - literally - work in hell.
+This is the case of Anto Wijaya, one of the 400 miners who make their living taking sulphur from the Kawah Ijen volcano, east of the Indonesian island of Java.
+To do so, he has to descend every day to the bottom of the crater, where the sulphurous gas emanating from the bowels of the earth solidifies on contact with air.
+After breaking off large sulphur rocks, which in total can weigh up to 70 kilos, he carries them in two bamboo baskets on his shoulders along the steep stone paths.
+It is only 250 metres to the top of the volcano, which rises to 2,386 metres above sea level, but the exhausted porters take over 40 minutes to get there, at snail's pace, keeping their balance and measuring their steps carefully to avoid slipping and falling over the precipice.
+They know that one slip could cost them their lives, as happened to a French tourist who plunged to her death a few years ago on the hazardous Kawah Ijen cliffs.
+The Kawah Ijen miners are paid 5 euro cents for each kilo of sulphur removed.
+Once at the top, they make their way past the tourists who photograph them like circus monkeys and then, lugging their heavy baskets, they walk three kilometres to the scales installed by a mining company a little further down, 1,850 metres above sea level.
+This is PT Ngrimbi Candi, a company which, since 1960, has been exploiting the volcano, and quite literally its workers, whom it pays 662 rupees (5 euro cents) per kilo of sulphur.
+It then sells the sulphur for 10,000 rupees (83 cents) to the petrochemical industry, as the mineral is widely used in everyday life and is used in the manufacture of matches, fireworks, cosmetics, dynamite and even for whitening sugar.
+"We generally carry 70 kilos, so we get about 46,000 rupees (3.8 euros) a trip" explains Anto, who usually make three trips a day.
+Each one takes three hours and you end up exhausted, but it means he gets 138,000 rupees (11.5 euros) at the end of the day.
+Although it seems a pittance for such an inhuman effort, it is three times what he would earn in the field.
+"Miners' wages are very high here, whereas coffee harvesting is paid 15,000 rupees (1.2 euros) a day and the average monthly wage is two million rupees (167 euros) " explains the porter, who previously worked as a mason in the island resort of Bali.
+There, his wage was 75,000 rupees (6.2 euros) a day and the work was not as hard, but Anto has returned with his family to Banyuwangi, a village near the volcano, for a compelling reason which, in Indonesia, is as overriding as the sulphur: "I married a girl from Bali, where they are Hindu, and I've brought her to Java to convert to Islam."
+Anto has asthma, he has difficulty breathing, coughs constantly and his eyes are irritated by the toxic gases.
+At 27 years old, Anto has been risking his life for three years in the Kawah Ijen volcano, and the sulphur has already begun to take its toll on him, even though he covers his face with special mask and goggles.
+He has asthma, he has difficulty breathing, coughs constantly and his eyes are irritated by the toxic gases from the volcano.
+This is the price you have to pay to realise your dreams.
+"I'll go on working two more years because I want to open a shop or study Spanish or French" he vows in more than acceptable English.
+Punished for life, this pleasant, intelligent young man could be a tour guide, waiter or hotel receptionist, but instead he does the work of a mule.
+Sharing a filthy wooden hut with other porters, he gets up every day at two in the morning because the sulphur doesn't stop flowing at night, when its characteristic yellow colour turns blue and it glows in the dark.
+Defying the shadows, Anto descends the crater and lights the path with a small torch attached to the helmet he bought with his money.
+Some 400 porters carry sulphur baskets on their shoulders from the crater.
+Despite their huge profits, the mining company has not mechanised the sulphur extraction process to save costs, nor has it provided any equipment for the porters, who work for themselves and by the kilo.
+In fact, they do not even see any of the 30,000 rupee (2.5 euro) per camera surcharge that, on top of the 15,000 rupee (1.2 euro) entrance fee, the guards of this natural reserve charge to tourists who come to photograph the volcano and their human mules.
+"This work is for animals, not people" protests Madrusin, a burly 42-year porter who has been working at Kawah Ijen for three decades, since leaving school.
+He can lift up to 110 kilos, ensuring that he will go on working "all he can" because he needs the money to educate his three children, aged between 18 [months?] and 10 years old.
+I won't retire, I'll die here because the volcano has been my whole life.
+Although the sulphur burns your throat and stings your eyes when the wind suddenly changes and traps the miners in the thick columns of smoke coming out of the volcano, they are so hardy that no-one complains of serious illnesses... apart, of course, from their common respiratory problems, osteoarthritis, knee pain and sores on the shoulders, which have been misshapen by the weight of the baskets.
+Balancing the basket on his back, Unainik can only carry 50 kilos now he is 53 years old.
+Every day, he and his fellow workers break off 15 tonnes of sulphur from the volcano, which three lorries move to the warehouse in Tamansari, 18 kilometres away along a goat path that passes through scrubland.
+"I won't retire, I'll die here because the volcano has been my whole life" says Unainik, opening a mouth full of gaps where teeth use to be.
+The oldest of his five children, 30 years old, also works carrying sulphur.
+Time passes, but poverty perpetuates from generation to generation in one of the hardest jobs in the world: the one done by human mules in the Kawah Ijen volcano.
+Singapore seeks babies to save its economy
+Singaporeans blame their careers, stress and the cost of property and education for not having children.
+"Singapore's population needs to grow."
+I'm a patriotic husband, you're my patriotic wife, let's do our civic duty and create life!
+It may seem unlikely that these verses are part of an advert for mint sweets, but in spite of this - or perhaps because of it - the video went viral on YouTube in Singapore earlier this year.
+The phrases are part of a rap and make use of local references such as "Let's put a bao (bun) in the oven" to make fun of the birth rate in Singapore.
+The advertising company that made the video, BBH, is hopeful that the advertisement will manage to focus attention to the problem in a fun way.
+Its creative director, Douglas Hamilton, says he wanted to use the power of music to make people perform their "national duty."
+It's purely an Internet thing, so we had to make it fun and amusing.
+It's the biggest problem facing this country.
+We are the world's worst at reproducing our own progeny, so we felt it was an issue we had to address.
+We knew the Government had tried many things, like launching perfumes with pheromones or organising speed dating evenings.
+Many of these ideas may have been creative, but they didn't necessarily work.
+So we thought: why not be as creative as possible to solve the problem, by composing a rap?
+1.2 children
+But the Singapore Government is not taking it so lightly.
+It spends USD 1,300 per year on policies to encourage people to have more children.
+A government package for marriages and parents grants up to USD 15,000 per child, extends maternity leave and distributes tax benefits.
+But this has all had little effect.
+Singapore is a rich, high technology city State in Southeast Asia, also known for the conservatism of its leaders and its strict social controls.
+The birth rate in Singapore, according to its national population division, currently stands at 1.2 children per woman.
+The last time it was over 2, known as the replacement rate, was in 1976.
+So why are Singaporeans not having children?
+Tan Wei Ming, Director of Marriage and Family Policy of the National Population Division, said that it is a result of "better education" and "a wider range of career opportunities."
+"This has given people a wider range of options in terms of life goals and priorities, beyond getting married and starting a family" he explains.
+These changes in social norms have contributed to increasing numbers of people who are single, and delaying marriage and births, which has resulted in a decrease in the birth rate in Singapore.
+Meanwhile, an EU immigration policy aimed at dramatically increasing immigration to cope with the population decline has created resentment among the local population.
+In Singapore, there are websites where xenophobia against many new immigrants is widespread and thinly disguised, especially the Chinese who are criticised for keeping wages low and not integrating.
+Increased immigration is also seen as one of the reasons why, last year, the Singapore ruling party experienced its worst election result since independence.
+Since the election there has been an attempt to correct the problem, with the highest taxes and levies for foreign workers.
+Unexpected consequences
+While a fall in the birth rate has known effects on a nation's economic growth, tax revenues, healthcare costs and immigration policies, in Singapore's case there are also some unexpected consequences.
+The Government is trying not to build so many small houses.
+For example, it has started to influence the real estate sector.
+Its urban development authority has started to control the number of small apartments, known as "shoe boxes," which can be built in certain areas of the city.
+These apartments have a surface of 46 square metres and have been very successful in terms of sales.
+However, there is concern that they may promote a single-living lifestyle and discourage developers who want to build large family houses.
+But, Lim Yew Soon, managing director of the real estate company EL Developers, says his "shoe boxes" sell much faster than larger units.
+They are more popular, in the sense that the units sell days, even weeks, faster than larger units.
+This means they are much better for our cash flow.
+However, he admits that the new regulations give clearer guidance to developers, who previously had obstacles put in their way if they provided for too many small units in a project.
+Too stressed
+Singapore is a city State.
+Although these new rules may be a step towards increasing the national birth rate, when talking to Singaporeans working in the central financial district, it seems they will not have much impact.
+"People are very stressed, houses are expensive and so is education, so a lot of people are putting off having a family" says a young executive.
+Other people can have children.
+"But, for me, it is important to have my own money and time" says another young man of around 20 years old.
+Men and women alike mention their careers, stress and the cost of property and education as the reasons preventing them from having children.
+So, much as the Government is trying to encourage its citizens to have children, when it comes to babies, the Singaporeans have the last word.
+What is private offline is private online
+Privacy.
+According to the Spanish Royal Academy Dictionary, it means the quality of private life or "the level of privacy which a person is entitled to protect from any interference."
+What is privacy for an under 16?
+How do you apply this definition to their daily life and social networks?
+Do they understand the dangers they are exposed to by airing information over the Internet which they probably would not share offline?
+ElPeriódico interviewed five children aged between ten and 15 years old who are frequent Internet users.
+In four cases, they associated the term with "something very much mine" on a personal level, and "in the user name and password" when applied to social networks.
+"I wouldn't upload my deepest secrets in a post" says Jorge, aged ten, when trying to explain the meaning of privacy on sites such as Facebook, Twitter, Hotmail and Windows Live Messenger, with which he has had accounts for two years.
+"They are very secret secrets, I'll tell my mother, but not everybody" he says.
+On FB I upload nice pictures or games.
+And I have fun with people I know.
+"I wouldn't share a photo that isn't mine, or that belongs to somebody who's doing something stupid" he says.
+The child recognises that it is bad to post obscene pictures of naked people, crimes, or write humiliating or aggressive comments.
+Jorge says he knows the 35 friends he has on FB and his nine followers on Twitter.
+Most are relatives.
+His mother is included, and she has the password to one of the accounts.
+I opened Twitter to express myself and post interesting tweets.
+"I don't know if they answer me, I only upload them" he adds.
+"Social networking is fun, I can talk quickly to relatives far away or my friends" he says.
+He does not hesitate to reply that he would never accept a request from an unknown person.
+Nor would he take any notice of someone who recommends a stranger to him.
+The case of Joseph, aged 14, is different.
+This teenager has accounts with Hotmail, Facebook, My Space and Ask, and in the last case he admits not knowing 20 of the people added to his friends list.
+"It doesn't bother me, because we have something in common, like music" he says.
+The boy says that no-one has suggested anything to him or asked him for his home address or phone number.
+"If they pressured me or asked me for it, I'd just delete them from my account" he states.
+Joseph became a follower on Ask, after reading a recommendation on Twitter.
+This teenager is not alien to experiences of what is now known as cyberbullying.
+An acquaintance of a friend of mine was being pestered on a social network.
+They were threatening him and demanding money from him.
+"I never found out who it was" he says.
+The victim, according to José, did not close his account.
+"He just made it private."
+He then explains a series of steps to configure the account safely.
+Unlike Jorge, this boy would upload photos of acquaintances in uncomfortable or embarrassing situations.
+I would do it if I didn't like somebody, or they made me want to do it.
+"However, I know that's cyberbullying" he admits.
+Key questions
+Marielos Porras, an English teacher with a degree in Education and Learning, believes that to guide children and teenagers, they should understand that the purpose of social media is to inform.
+"The Internet emerged as a means of searching for information, but with the appearance of these websites, the rules of the game changed" he says.
+Porras says the scholar Marc Prensky, with a Master's degree in Education from Yale University and author of the work Digital Natives, Digital Immigrants, coined these terms to explain the phenomenon.
+Digital natives are those children and young people born with technology.
+"We are the digital immigrants who have to teach them, when we are still in fact learning" he says.
+He says that the issue is complex, "because we are asking them to have a clear policy on what is appropriate or not to disclose, publish or divulge, at an age at which maturity is not conducive to this."
+"They also have to be selective when what matters most is to be popular and have thousands of friends, without thinking of the consequences" he adds.
+According to the specialist, the most effective way to teach children and teenagers what privacy is, is through questions that make them think.
+"Telling them not to do it is no good" he adds.
+Porras then lists some options: There are things you wouldn't tell a stranger, so why do it online?
+Or, would you like a friend to publish a photo of you like the one you posted of a friend?
+Do you know what others publish about you?
+When tagging party photos, did you ask the other people's permission to tag them?
+And one more question: does everyone need to know what you're doing all the time?
+Another point is to make them see that they must behave online as they do offline.
+The rules are the same.
+"Outside the Internet, people act with respect, morality and other principles, so they should act the same way on social networks" he says.
+Monitoring
+Stuart Guard, a university professor, primary school teacher and educational consultant, says it is essential for parents to read social networks' policies thoroughly.
+By understanding all the clauses, they have solid grounds to talk to their children about the implications of opening an online account.
+"For example, the age at which you are allowed to share or publish" he says.
+According to Guardia, it is important to remind children the "don't talk to strangers" lesson.
+Unasur Summit closes without making public the Lima Declaration
+The Sixth Presidential Summit of the South American Union of Nations (Unasur) concluded today in Peru without making public the Lima Declaration, previously announced and theoretically signed by the seven attendee leaders.
+Efe repeatedly tried to gain access to the document signed at the Sixth UNASUR Meeting of Heads of State and Government, but Presidential and Chancellery sources initially said they would deliver it after the summit closed, but later they claimed that it will be published at some point on the Peruvian Government website.
+When asked about the text, they pointed out that the content had been disclosed by Peruvian President, Ollanta Humala, during a brief statement to the press.
+Journalists' access to information from the Summit was restricted at all times.
+During the summit, in the press room, only video was aired, with no sound, showing the presidential meeting with the message "closed session, audio restricted."
+The little information that circulated among reporters was given by the press spokesmen of some of the UNASUR governments attending the meeting, but not the Peruvian Government.
+The only document released during the summit was the list of attending presidents, which angered hundreds of journalists from various national and international media, who asked for more details.
+The Peruvian President then sent an email to the media with the "final statement" of the summit, but this was Humala's statement, and not the official document that closed the summit.
+Last October, Peru hosted the Third Summit of South American-Arab Countries (ASPA), and this time, despite repeated requests from the press, the previously announced Lima Declaration was again not made public.
+The ASPA official website confirms that the document was published last Tuesday.
+At both international events, the Peruvian authorities were at pains to ensure that there were broadcasting systems assured for all the journalists, but limited the obtaining of information to a maximum.
+The summit also concluded with the joint commitment of Chile and Peru to accept a ruling by the Hague Court to adjudicate a border dispute between the two countries.
+The Presidents of Peru, Ollanta Humala, and Chile, Sebastián Piñera, met during the regional event and confirmed that they will respect the decision of the International Court of Justice (ICJ), which on Monday, at The Hague, will start to hear the arguments of both parties, in the lawsuit Lima has filed against Santiago.
+"We will obey and execute the order that currently defines the differences we are bringing before this international court" said Humala, together with his Chilean counterpart.
+"Chile has been, is and will remain a country that respects international law and the peaceful resolution of disputes, treaties and international courts" added Piñera, greeting Humala with a handshake, alongside the flags of the two countries.
+Confirmation of both presidents that they would submit to the ICJ came after Colombia this week denounced the Bogotá Pact, whereby it accepted to submit to the judgement of this international court, following a decision on its maritime boundary with Nicaragua which it regarded as seriously flawed.
+The summit was held with the absence of the Presidents of Brazil, Dilma Rousseff; Venezuela, Hugo Chavez; Bolivia, Evo Morales; and Argentina, Cristina Kirchner.
+Paraguay, which was suspended by UNASUR in 2011 after the dismissal of former President Fernando Lugo, was not involved in the meeting.
+Host President Ollanta Humala was responsible for opening the session in the morning and closing the summit, just after noon in Lima.
+The President read the final document which reported that 16 agreements were adopted and the action plans laid down for 31 projects between the South American countries, for a total of 17 billion dollars of investments.
+Among the resolutions adopted, it was mentioned that UNASUR countries will take "important steps toward the goal of a South American citizenship, for which residence agreements are being extended."
+He reported that actions are being implemented to improve "cooperation in the fight against insecurity and transnational organised crime, actions to make medication more accessible, low-cost Internet access in all areas of South America, and to deal jointly and efficiently with risks of natural disasters."
+With Europe in crisis, "economic consolidation (in Latin America) should not have a triumphalist attitude but should serve to expand its productive matrix and glimpse a better future for its people" Humala added.
+"We decided to focus on a group of 31 flagship projects that will improve connection among areas of South America, especially in rural and border areas... uniting our countries and creating new economic networks" said the Peruvian President in a message read out.
+Among these projects, he mentioned that five are in Peru and are located in the transverse axes of its territory, between the coast and Brazil, and two focus on increased connection with Ecuador, although he gave no further details.
+Also, the final document mentioned the political situation in Paraguay.
+"We hope the electoral process in that country serves to reincorporate it in the Union of South American Nations," from which it is currently excluded.
+The need for Latin America to remain a prosperous, peaceful and integrated nation, with good neighbourly relations, was another issue highlighted by the summit.
+In this sense, the President of Colombia, Juan Manuel Santos, said before attending the start of the regional event that he expected to meet with his counterpart from Nicaragua, Daniel Ortega, on Saturday in Mexico, to respectfully discuss the maritime dispute after the failure of the ICJ, questioned by Bogota.
+"The day after tomorrow (Saturday) I might have a meeting with President Daniel Ortega" Santos said.
+"We will review all these paths, [which] are not exclusive, and the treaty with Nicaragua will require a conversation with Nicaragua" he emphasised.
+"With President Ortega, I hope I can say that we handle this in the most civilised and respectful manner possible" said Santos.
+Santos and Ortega are due to meet on Saturday in Mexico, where they expect to attend the inauguration of the country's new President, Enrique Peña Nieto.
+Also, as part of the summit, the bloc's foreign defence ministers met in advance to approve the 2013 Action Plan, which seeks to strengthen dialogue and consensus on defence in the region.
+Argentina, Bolivia, Colombia, Ecuador, Peru, Brazil, Uruguay, Venezuela, Chile, Guyana, Surinam and Paraguay make up UNASUR, although the latter is currently suspended.
+Peru has the pro tempore presidency of the regional bloc.
+"South America should learn from Europe to integrate citizenship" says Rafael Correa
+The President of Ecuador, Rafael Correa, said today that the creation of a common citizenship is a goal that "South America, in this case, must learn from Europe."
+Correa, who took part in the Eleventh Presidential Summit of the Union of South American Nations (UNASUR) held in Lima, told Peru's state television that Europeans "killed one another in the Second World War" and other conflicts, "but are now practically one country."
+To this end, he defended the project to establish South American citizenship encouraged by member countries of UNASUR.
+"We have to achieve the free movement of citizens and workers for any South American country, as is already the situation with members of the Andean Community. However, there are still reactionary sectors that want us to return to the past" he said.
+The Ecuadorian President was also in favour of the restructuring of the Organisation of American States (OAS) under the premise of reducing the influence of the Anglo-Saxon states and taking into account those who have signed the Pact of San José on human rights.
+Those who speak with authority never commit to anything, whereas we South Americans sign everything.
+"It is incomprehensible that the Inter-American Commission on Human Rights is in Washington under US funding" he said referring to Ecuador giving political asylum to WikiLeaks founder Julian Assange.
+Correa said he does not regret that decision because with it he has not betrayed his principles, but has respected his "deep democratic and human rights values."
+He added that, at the time, "he had reasonable suspicion that Assange would be extradited to another country and that his case would not be respected."
+Additionally, he criticised the Swedish courts for demanding that he be subject to questioning for an alleged sexual offence in his country, when "Swedish legislation itself dictates that he can be questioned via videoconference, which could be done from the Ecuadorian Embassy in London."
+Correa said that there is a risk of deterioration of Assange's physical and mental health.
+"I have not spoken to him since he was at our embassy, but the ambassador informed me that he had a minor lung problem, nothing serious" said the Ecuadorian President.
+What there is, is the danger that his physical and mental health may deteriorate due to being locked in a small space without any outdoor exercise.
+"That would complicate the health of any person" he added.
+Correa said that the solution to the asylum granted to Assange in June by the Ecuadorian Embassy, in London, through the issue of a safe-conduct pass that permits travel to Ecuador, is in the hands of Great Britain, Sweden and the European legal authorities, and stressed that there have been talks with London to seek a solution to the imprisonment of the WikiLeaks founder.
+We do not negotiate with human rights, we do not use that word in this case, but there have been ongoing discussions.
+"The solution to this problem is in the hands of Great Britain, Sweden and the European legal authorities, because Assange's lawyer, Baltazar Garzon, is handling a series of cases in different European courts" he said.
+And he felt that "if Britain says no to the safe-conduct pass, it's over."
+And if Sweden, as its legislation perfectly well allows it to do, and as it has done in other cases, questions Mr Assange at the Embassy of Ecuador in London, or interrogates him via Skype tomorrow, this problem is over.
+Correa took the opportunity to reassert himself as a defender of freedom of the press and stated that what he does not tolerate is "the mediocrity, dishonesty and lies that undermine the freedom of expression."
+"The greatest enemies of the press freedom are not evil and wicked politicians, but bad journalists depending on profit, blackmail and extortion" he said.
+In that regard, he welcomed the fact that it was no longer these journalists, "or the bankers or bourgeois and hegemonic countries that dominate Ecuador" and said that, if re-elected, he will "step up the revolution to continue on the same path and in the right direction."
+Correa also supported the decision to maintain the veto on Paraguay in UNASUR, at least until their next elections, arguing that the body "must be firm and not tolerate opportunism and a coup masked with legality" because this will in fact "destroy the legitimacy of Paraguayan democracy."
+The Ecuadorian President also considered the "perfectly pertinent" desire of his Colombian counterpart, Juan Manuel Santos, to now negotiate with Nicaragua the maritime boundary between the two countries, after the ruling of the International Court of Justice in The Hague, in favour Nicaraguan maritime sovereignty.
+For now that ruling is not being followed.
+It is a problem between a South American country and a Central American one.
+Conflict is inevitable, but must be overcome by the desire to walk together.
+They need to be processed in a comprehensive manner to overcome them and move forward.
+Additionally, he trusted in a sound conclusion to the maritime boundary dispute opposing Peru and Chile in the same court and said that "it is right for Latin America to refer to international courts if both countries agree to accept losing, however hard it may be."
+With reference to the possibility of his standing as a candidate in the upcoming presidential elections in Ecuador seeking a third consecutive term, he said he sees that possibility "with much optimism and joy, although at times it is pretty hard."
+Correa said that if he loses the elections in February 2013, he will retire from public life.
+Personally, I've never been interested in power, but in situations as unjust as those in Ecuador, socio-economic poverty can only be corrected by political power.
+"My political movement believed that it was me who ensured that probable victory, so we have to accept that responsibility" he said.
+If I won, it would be my last period in office and then I would leave public life.
+If I lose, likewise.
+"It's a decision" he confirmed.
+Correa also referred to Venezuelan President Hugo Chavez's new health treatment in Cuba.
+I just spoke with Venezuelan Vice President Nicolás Maduro and he tells me that Chavez went for treatment that was already planned, routine treatment, and it was expected he would win the campaign and return to Cuba.
+"This does not mean a health relapse for President Chavez" he said.
+In Lima today, the Ecuadorian Head of State attended the Sixth Summit of Heads of State and Government of the Union of South American Nations (UNASUR), which concluded with calls for greater regional integration to sustain progress, equality and security.
+Deaths caused by AIDS are nowadays due to late detection
+Fabrizio was 21 years old when they confirmed his test result: HIV positive.
+"It was like a bomb dropped on me" he says, recalling the time of the announcement, which the doctor was trying to make "softer," apparently unsuccessfully.
+The boy hid it from his family.
+He decided to care for his illness alone and began to learn about it; thanks to his efforts he has just celebrated his 43rd birthday.
+He is undoubtedly one of the oldest patients in the HIV Unit of the Guadalajara Civil Hospital (CHG), where he arrived in 1994 after several battles with his health.
+Fabrizio has lived with the human immunodeficiency virus (HIV) for 22 years, hard to imagine in the early '90s, when there were many questions, few treatment options and a great deal of stigma.
+Then, even the director of an IMSS [Mexican Social Security Institute] clinic refused to discharge him "because he had a cut."
+At that time, having Aids was synonymous with death.
+Now it is possible to survive the syndrome and do so with quality of life.
+However, many people are still unaware of their illness, and only seek help when the virus has already caused havoc, "exhausted" their immune systems and they are suffering from opportunistic infections.
+31 years after of the onset of AIDS around the world, at least since the first reported cases, "the great achievement at this time is that the life expectancy of patients starting treatment in good time and the life expectancy of the general population is exactly equal" stated the head of the CHG HIV Unit, Jaime Andrade Villanueva, saying that this information was endorsed in April this year in a prestigious scientific journal.
+Infectious disease specialist and expert in HIV/AIDS, Andrade Villanueva said that since 2008 scientists had concluded that AIDS was not a death sentence, but that life expectancy and quality of life depend on the degree of damage to the immune system that patients present when they are diagnosed, with a higher life expectancy for non-drug users: up to 30 years for patients with a 200 CD4 count and 50 years for those reporting a 500 CD4 count.
+In simple terms, this means that anyone diagnosed HIV positive at 25 years old, under these terms and "as long as they keep it under control, can live with no problems to 75" said the interviewee.
+To gauge this progress, it should be remembered that the average life expectancy of Mexicans today is 76 years.
+Although mortality has dropped significantly in recent years and, in the case of Mexico, the number of people dying of AIDS has fallen from 6,678 in 2007 to 4,862 in 2011 (UNAIDS annual report), it is also true that since the advent of AIDS, 60 per cent of patients in the national database have died.
+In Jalisco alone, only 255 people died in 2011, and there have been 187 deaths up to May of this year; however, we are assured that there has been universal access to antiretroviral drugs since 2005.
+- Why are do still deaths occur?
+- I think the problem is not to do with access to treatment.
+That's how I view it, and that's how it's been at our hospital.
+For at least the last 12 years we've had no shortage of medicine, the problem is that patients arrive in an advanced state of illness because they are unaware of their HIV status, that is to say, the later stages of the disease.
+He gave a compelling statistic: "Nine out of ten patients arrive when they already have an opportunistic infection, so what needs to be done to have a greater impact on overall mortality is to make earlier diagnoses and, therefore, offer mass detection tests for everyone who needs them. "
+Specialists and officials of the State Council of AIDS Prevention in Jalisco (COESIDA) agree on this proposal, as do the patients themselves, such as Fabrizio, who came to be tested at a private laboratory, motivated only because a friend had done so and, despite his young age, he was around in the AIDS era and had even suffered Kaposi sarcoma, a cancerous tumour that is one of the common complications.
+Everything changes when you know you have AIDS.
+Some people think they're going to die and don't want to know anything.
+"If I'm going to die, I'd rather have a blow-out three times a week" they say, but not in my case.
+The change was for the better; I eat well, I exercise, I take my drugs.
+To date, his parents are only aware he had cancer.
+I live as normal a life as anyone else.
+"I work, I take part in a lot of activities, I travel, I have an active but responsible sex life, I take care of myself and the other person" said Fabrizio, who agreed to share his intimate secrets with MILENIO JALISCO, to motivate those people with his story who today, in the context of World AIDS Day, are afraid.
+They should get tested if they are at risk. because the sooner they know if they are HIV positive, the better, and if they have already been diagnosed, they must learn to live like any other person, while being responsible.
+This is his message, which summarises the theme of the fight against AIDS in 2012.
+Condoms behind the counter.
+The gaps between health programmes and ordinary citizens are huge, said Ricardo Salazar, a journalist from Guadalajara who has taken up the HIV cause.
+And the greatest cure is prevention.
+In places dedicated to this task "the distribution of condoms has actually increased; previously, they used to give us one or two, now they give us packets of a hundred, and that's fine, but it turns out there are still people out there who have no access condoms" he said.
+Among the most vulnerable to new infections are teenagers.
+"Why do you want them?" is a common question, asked with sarcasm and judged according to the values of social workers, counsellors, pharmacy workers and healthcare staff who do not want to expose teenagers to sex, said the speaker.
+It was decided to change such inefficient allocation, and that condoms should not only be placed behind counters, but that packets of one hundred should be found in public toilet dispensers in places frequented by young people.
+This is not promoting promiscuity.
+It is not about paying for their beers or motel fees, as Governor Emilio Gonzalez said, when asked if there would be distribution of condoms during his administration.
+"And it's not about sexuality, but it is best to provide condoms to those already practising sexual activity" he said.
+Jalisco key points
+There are 13,435 cumulative cases (12,158 AIDS and 1,317 HIV).
+The state is 4th in the nation in new and cumulative cases of AIDS and 13th in HIV.
+92% of new infections are through sex, 6% via the bloodstream and 2% perinatal.
+An estimated 50,000 people may be living with HIV, as for each registered case there are around 4-5 people who do not know they are positive.
+Ratified by a United States court of appeal, a judgement which ignores the restructuring of the Vitro Group's debt achieved via a bankruptcy in Mexico, the scenario is an ominous precedent for any national company with offices in the neighbouring country that has solvency problems.
+It seems, then, that the proceedings in support of survival of firms permit Mexican law are not valid in the land of stars and stripes, contrary to international conventions.
+In practical terms, the endorsement of the judgement delivered on 15 June by Judge Harlin Hale of the Bankruptcy Court of the Northern District of Texas, leaves Mexican firms defenceless against possible seizure of their property outside of Mexico.
+However, the decision opens the door for the leading glass manufacturer in Mexico to appeal to the Supreme Court of the United States, claiming three inconsistencies.
+From the start, while the trial judge notes that creditors should be governed by the United States Bankruptcy Code, the Court of Appeal for the Fifth Circuit, based in New Orleans, states that the main action is the insolvency action handled in Mexico.
+The first point would involve ignoring international procedural cooperation in cases of insolvency of companies with transnational profiles.
+Indeed, the UN Model Law for International Trade Law Uniformity was created for this purpose, with the American Law Institute positioned as arbitrator.
+Secondly, the judgement establishes that without the intercompany vote, with the debts the Vitro subsidiaries had with their parent company recognised in the critical mass of the insolvency, the majority needed to approve the restructuring might not be achieved.
+However, Mexican law recognises the possibility.
+In fact, the Vitro case was not the first one in which the scheme was accepted.
+There are half a dozen examples, including Agremex and Commercial Mexicana, whose intercompany debts were endorsed by the Federal Bankruptcy Institute.
+What is also certain is that, not including the votes of subsidiaries, the Vitro creditors who fought against it in the US courts, namely "vulture" funds such as Aurelios Capital, Aurelios Convergence, Elliot International and Liverpool Limited, did not achieve a majority.
+The vote was apparently 45 percent versus 37.
+This data is omitted by the Court of Appeal.
+From another perspective, the latter blames Vitro for the difficult situation it has faced since 2008, while trying to avoid the severe economic crisis faced by the United States, turning its back on the country.
+For now, the Gonzalez Sada family firm has lodged a motion for reconsideration before the Court of Appeal for the vote to reach the plenary of the court, that is, the five judges, given that only three voted previously.
+Should this fail, an appeal for review by a higher court, in this case the US Supreme Court, will be filed.
+The real problem is that the court bypassed a document sent by the Government of Mexico in the capacity of amicus curiae ("friend of the Court"), which details the procedure followed by Vitro under the framework of the Commercial Insolvency Law, noting that the latter discharged itself with adherence to the agreements signed by the two countries to link it with Chapter 15 of the Bankruptcy Act of the United States.
+Moreover, it should be noted that the country yielded to the principles of the United Nations Commission on International Trade, that is the rules set for cross-border insolvency cases, ensuring fairness for debtors and creditors.
+Double whammy: Vitro hit and country hit.
+Balance Sheet
+With the complaints put on the table by the unions of Mexicana Airlines against the former owner of the company, Gastón Azcárraga Andrade, who is accused of mismanagement, dormant for several months, the Airline Pilots Union Association already found the bottleneck.
+The proceedings headed by Carlos Diaz Chavez Morineau has just filed a criminal complaint against the National Banking and Securities Commission, which is accused of obstructing justice.
+The claim is that the supervisory authority has consistently refused to provide reports to the Attorney General's Office on a transaction carried out by the employer to remove 198 million pesos from trust F/589 of Banco IXE, on behalf of Mexicana de Aviación.
+The resources were apparently channelled towards the purchase of shares in the company Administradora Profesional de Hoteles.
+As you know, Azcarraga Andrade is the main shareholder of the Posadas hotel chain.
+Opposing Dragon Mart
+A group of local and foreign environmentalists, academics, businessmen and members of the public gathered at the weekend at a forum at the University of the Caribbean to approve the creation of a broad front to oppose the opening of the Chinese Dragon Mart in Cancun.
+As you know, we are talking about a huge sales and distribution centre in Mexico, Central America and the Caribbean, selling Chinese products, with a residential area at the bottom for employees of 150 companies.
+Previously, Canacintra had managed to unite the governors of the southeast of Mexico to oppose the monumental building that destroyed part of a protected area and represents the mother of all threats to industry.
+The death of ACTA
+The Government ignored an order of the Senate to explain under what terms and conditions the Mexican Ambassador in Japan signed the Anti-Counterfeiting Trade Agreement, known by its acronym ACTA, according to the Mexican Institute of Industrial Property, and the matter has already been archived.
+As you know, the action was taken even though the Senate had ruled out the possibility, deeming it an infringement of freedom of expression on social networks.
+Homex long term
+In effort to repay long-term debt without affecting short-term debt, the housing developer Homex is placing securities exchange certificates on the market for 500 million pesos.
+The issue is the first of four identical issues which are offering to repay interest every 28 days.
+Birth of Competival
+A consortium under the name Competival has just been established, comprising the companies NYCE, e-Quality and Kernet, leaders in information technology, the objective of which will be to market the services of software clusters in Central and South America.
+Investments in this area exceed USD 1.5 billion.
+Hector "Hetin" Reyes: "Basketball has been my life"
+Basketball globetrotter Hector "Hetin" Reyes was involved in the sport for over 60 years and, thanks to it, travelled the world.
+Few people in Puerto Rico have a mental recollection of local basketball history as broad as that of Héctor "Hetin" Reyes.
+Reyes was immersed in the sport for over 60 years before being confined to a wheelchair in 2008 following a stroke; he was a minor league player, National Superior Basketball player, BSN representative and manager with the Bayamón Vaqueros or President of the Basketball Federation.
+"I wore lots of hats in basketball throughout my life, including several at the same time, like when I was president of the BSN, general manager and federative president of the National Team during the '90s," recalled Reyes during Primera Hora's visit to his home in Bayamón, where he lives with Isabel, his loyal wife for over 50 years.
+"Basketball has been my life."
+Reyes is not exaggerating when he makes that statement.
+The walls of his house are almost totally decorated with pictures and memorabilia denoting his long career, which goes to prove it.
+Bayamón at heart
+Of them all, the ones he treasures with the most emotion are the ones that remind him of his time spent with the Vaqueros, from the mid-50s as a player until 1982, when he completed 15 years serving as co-agent or agent of the franchise.
+"Those were my best years, the ones I enjoyed the most because I had the opportunity to be part of the Vaqueros' eight championships, since 1967, either as agent, co-agent or manager.
+There were many good years, including the five consecutive championships from 1971 to 1975.
+And then I said goodbye with one in 1981, Jerome Mincy's debut year in the BSN.
+Then "Cuco" Ortiz took over - he was a great manager" said Reyes.
+I remember that Gene Bartow, who had directed here and was at the University of Alabama (Birmingham), said to me 'I've got a very strong player for you, 6'7" tall.
+Do you want him?'
+And that was the beginning of Mincy, one of the best players Puerto Rico ever had.
+Bartow then recommended the sharpshooter Gausse Raymond, who established residency here and was one of our best shooters.
+I remember him saying that if Mincy had given Bayamon one championship, Gausse would help get another.
+The Vaqueros' championship with Gausse was enjoyed, but from a distance, because in 1988 he was already becoming a federative bigshot.
+For that time, he preferred to enjoy his own and Mincy's accomplishments in the national team.
+I remember when we beat the United States for the first time during the 1989 Pre-Olympics in Mexico.
+Then came the 1990 World Cup, where we came fourth and it should have been bronze, but for the Canadian referee who made us repeat the final play for the second time, said Reyes.
+Is the 1990 World National Team the best you've ever seen?
+It's one of the best, as good as the one that beat the Dream Team in the 2004 Olympics.
+However, my favourite was the one in the 1991 Pan American Games in Cuba, when we won gold and gave the US team a beating, which was quite similar to the time we won bronze at the World Cup.
+That team not only again included Mincy, Gausse, Ramon Rivas, Fico López and 'Piculín' (Ortiz), but also the young (Javier) 'Toñito' Colón and James Carter, the Leon brothers (Francisco and Edgar) and Mario 'Quijote' Morales, who was kept out of the 90 team by a knee injury.
+A team that maybe was not the best in terms of members, but which gave us a gold medal and was a great joy to work with, was the 1995 Pre-Olympic team in Neuquen, Argentina.
+With role players such as 'Canito' Nieves, Pablo Alicea and the young Rolando Hourruitiner replacing the players suspended after the shambles of the Mar del Plata Pan-American Games, we won gold against all the odds.
+Who was the best Puerto Rican player?
+Without any doubt, Piculín Ortiz.
+His numbers at international tournament level are awesome.
+Nobody in Puerto Rico has dominated at that level like Piculín did.
+Not to mention his career in the various leagues he played in.
+Who was the best Puerto Rican manager?
+That's a difficult one.
+We had a very good team, including Julio Toro, Flor Melendez, Carlos Morales, Raymond Dalmau, Armandito Torres.
+Of the youngsters, I really like the work of Leo Arill.
+What do you consider your greatest achievement in the federation?
+Having been part of the National Team's most glorious era between 1988 and 1995 and in the early 90s the BSN had up to 17 teams in a season.
+What was there left for you to do?
+There were things I'd have liked to implement, such as regionalising the minor leagues.
+For example, the boys of Ponce only play in their area and only get to face teams from other parts of the island in the national playoffs.
+Right now the kids are riding and playing too much, unnecessarily.
+At least I see the fruit of compulsory certifications and a course for leaders, table officials and referees.
+That pleases me.
+What are you doing now?
+The most I do is listen to music, watch music videos from my era on YouTube, enjoy my grandchildren and occasionally go to basketball games.
+And of course, enjoy the company of my wife, Elizabeth, who has always been with me.
+Actor Larry Hagman dies
+Larry Hagman, born on 21 September 1931 in Fort Worth (Texas), became world famous for his role as John Ross Ewing, better known as "JR," in the television series "Dallas," in which he played a ruthless, malicious and manipulative businessman.
+Larry Hagman, whose role as oil tycoon predator JR Ewing in the television series "Dallas" became a symbol of greed in the 1980s, has died.
+He was 81.
+Hagman, who returned this year as JR in a new season of "Dallas," died on Friday afternoon of cancer complications, according to a family statement provided to the Associated Press by the Warner Bros., producer of "Dallas."
+"Larry was back in his beloved hometown of Dallas, once again representing the iconic role he most liked" the family said.
+Larry's family and closest friends were with him in Dallas for the Thanksgiving Day holiday.
+Linda Gray, who played his wife in the original series and the sequel, was with Hagman when he died in a hospital in Dallas, said her publicist, Jeffrey Lane.
+He brought joy to all who knew him.
+He was creative, generous, funny, loving and talented, and I will miss him dearly.
+"He was an original guy and lived life to the full" said Gray in a statement.
+Hagman was diagnosed with cirrhosis of the liver in 1992 and admitted that he had drunk a lot over the years.
+In 1995 a malignant tumour as found in his liver and he underwent a transplant.
+Years before "Dallas," Hagman became famous on television as a decent guy in the light comedy "I Dream of Jeannie," aired on NBC from 1965 to 1970.
+He played Captain Tony Nelson, an astronaut whose life is changed when he meets an attractive genie, played by Barbara Eden, and takes her home to live with him.
+He also starred in two sitcoms that were not aired for long, "The Good Life" (NBC, 1971-72) and "Here We Go Again" (ABC, 1973).
+His film work included roles well received by critics in "The Group," "Harry and Tonto" and "Primary Colors."
+But it was his masterful interpretation of delightfully detestable JR that led to Hagman reaching his peak of stardom.
+The drama series on CBS about the Ewing clan and other characters in their orbit aired from April 1978 to May 1991.
+The tagline "Who shot JR?," designed to generate hype around an episode full of emotions in which Hagman's character is nearly killed, generated international speculation and millions of risky dollars wagered in gaming establishments.
+It also helped give the series a record audience at the time.
+When the answer was revealed in an episode in November 1980, an average of 41 million viewers tuned in and made "Dallas" the second most watched entertainment programme in history, after the final episode of "MASH" in 1983, which had 50 million viewers.
+It was JR's sister-in-law Kristin (played by Mary Crosby) who shot him.
+JR got her pregnant then threatened to say she was a prostitute unless she left town, but there were others who also had reasons to attack him.
+Hagman portrayed Ewing as a corrupt insatiable man with a charismatic smile: a dishonest entrepreneur and cheating husband who tried to have his alcoholic wife, Sue Ellen (Linda Gray), sectioned.
+"I know what I want on JR's tombstone" Hagman said in 1988.
+It should read: "Here lies the honest citizen JR Ewing."
+This is the only deal he lost.
+Victoria Principal, co-star of the original series, recalled Hagman on Friday as someone "huge, on and off screen."
+He is unforgettable and irreplaceable, for millions of fans around the world, and in the hearts of each one of us who was fortunate enough to know and love him.
+Ten episodes of the new edition of "Dallas" were broadcast a few months ago with great success for TNT.
+He had already finished recording five episodes for the second series and a sixth was in process, the chain reported.
+Immediately after, there was no statement from Warner or TNT about how the series would handle the loss of Hagman.
+Hagman, born in Fort Worth, Texas, was the son of actress and singer Mary Martin, who starred in classics such as "South Pacific" and "Peter Pan."
+Martin was still a teenager when she had him in 1931 during her marriage to lawyer Ben Hagman.
+He tried his luck in the New York theatre scene in the early '50s, and later served in the Air Force from 1952 to 1956, in England.
+While there, he met the young Swedish designer Maj Axelsson and married her.
+The couple had two sons, Preston and Heidi, and lived for a long time in the Californian city Malibu, home to many celebrities.
+In 2001, he called his memoirs "Hello Darlin': Tall (and Absolutely True) Tales About My Life."
+"I didn't put anything in it that I believed would hurt anyone or affect them in any way" he told Associated Press at the time.
+After his liver transplant, he became an organ donation promoter and worked as a volunteer at a hospital, helping fearful patients.
+"I advise them, encourage them, meet with them when they come for their surgery, and afterwards" he said in 1996.
+I try to offer some comfort, such as "Don't be afraid, it will be a little uncomfortable for a short time, but then you'll be fine."
+He was also an anti-smoking activist and took part in several campaigns.
+Start of a course that explores the "End of the World"
+Each week, students explore apocalyptic themes such as nuclear war, zombies, viruses and germs, and global warming.
+This term, when Professor of religion, Stuart Charmé, decided to give a course on the end of the world, he knew he had a compelling hook: The end of the "long countdown" of the Mayan calendar, 21 December, which had convinced many people that the end of the world was coming.
+But Charmé had no idea what awaited him over the next couple of months: The cataclysmic hurricane Sandy, a fiscal precipice some called "debt Armageddon" and a growing conflict involving Israel, where end-of-the-world Christians theorists think the Apocalypse will begin.
+"I didn't realise this was going to be the most apocalyptic term ever" said Charmé this week to students at Rutgers-Camden University (New Jersey).
+If you look at what has been happening in the world today as if we were at 30 days and counting, this has been a really good period.
+And remember that bad is good for those with an apocalyptic mentality.
+And he is not the only professor who offers courses on the "end of the world" this term, theoretically the last in history.
+At Temple, Associate Professor Barry Vacker is giving the course "Media, Culture and the end of the world."
+Each week, students explore apocalyptic themes such as nuclear war, zombies, viruses and germs, and global warming.
+"We looked at why these ideas proliferate over time" he said, and how they offer hypothetical scenarios that guide human behaviour.
+If nuclear material falls into the hands of terrorists, for example, a war could break out.
+This month students analysed movies with an apocalyptic theme and explored how they compare with real-life examples.
+"I've tried to inform students about what is possible, probable, credible and impossible" said Vacker.
+At the main Pennsylvania State University campus, Latin American History Professor Matthew Restall, and his colleague Amara Solari, an Associate Art History and Anthropology Professor, have teamed up to give a course, called simply "The end of the world."
+"We don't add '2012' so we always have the option of running the course again, if the world doesn't come to an end" said Restall.
+Despite the "impending doom," students have to study, undertake projects and take final exams.
+At Penn State, the final exam will be taken on the eve of the Apocalypse, which leaves students no choice but to work "until the very night the world is supposed to end" said Restall.
+The courses proved quite popular.
+"It was fully booked within two hours" said Restall, on his course for students with high averages, which was filled with 35 students.
+We received emails for weeks and weeks before the start of the term, from people asking if there were any places.
+Students, meanwhile, say the course is one of the most interesting around.
+"I find it fascinating to see what people do to console themselves" said Bridgid Robinson, a 23-year-old post-graduate Religion and Sociology student from Haddonfield, New Jersey, at Rutgers-Camden.
+And the apocalyptic, secular or religious mentality is just a matter consolation or a lack of it.
+Will Wekesa, a 25-year-old post-graduate Psychology and Nursing student, said he had seen all the apocalyptic movies.
+"I'd never heard of a class that could teach it" he said.
+I enjoy it.
+But none of the students interviewed - much less any professor - said they believed in the end date of December 21st.
+"Our first project was about the Mayan prophecy and to a certain extent we discredited it" said Julie Zeglen, a 21-year-old final year student at Temple, from West Chester.
+The Mayans never predicted the end of the world: it is just a key point in the calendar, said Restall.
+But he said that Western culture suffers from apocalyptic anxiety, which goes back several centuries, in which people react to changes around them by predicting the end of the world.
+The Internet has caused a boom in these speculations.
+"In other places, people don't think about it" he said.
+It's mostly in the English-speaking world.
+Joseph Dougherty, a Professor of religion at La Salle University, who is giving courses in the Philippines this year, responded quickly to the question of whether he knew about any courses on the "end of the world" there.
+"The Philippines are not taking part in the end of the world" he wrote, suggesting an exception of a higher authority.
+We have an indulgence from the Pope.
+Restall noted that over the years there has been talk of many days of the last judgement, and said that if nothing happens on December 21st, "people will immediately start thinking of the next date" or philosophising that December 21st is the beginning of a seven-year period after which the world will end.
+Students and teachers are taking the date lightly.
+Some said they plan to go to "end of the world" parties.
+"Maybe I'll call some friends so we can have a laugh together" said Samira Ford, 20-year-old communications student.
diff --git a/tests/data/pred_real/valid.pred b/tests/data/pred_real/valid.pred
new file mode 100644
index 000000000000..3c51623f0fd8
--- /dev/null
+++ b/tests/data/pred_real/valid.pred
@@ -0,0 +1,100 @@
+A Republican strategy to counter the re-election Obama
+Republican leaders justified their policy the need to combat electoral fraud.
+However, the Centre considers this a myth, stating that electoral fraud is rarer in the States than the number of people killed by
+Indeed, Republican lawyers identified only 300 cases of electoral fraud in the United States in a decade.
+One thing certain: these new provisions will have a negative impact on voter turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Unlike in Canada, the American States are responsible for the organisation of elections in the United States.
+It is in this spirit that a of American governments have new laws since 2009 making the registration or voting more
+This phenomenon gained momentum following the November 2010 elections, which saw 675 new Republican representatives added 26 States.
+As a result, 180 bills restricting the exercise of the right to vote in 41 States were introduced in alone.
+The new election laws require voters to show a photo ID card and proof of citizenship.
+Furthermore, laws also reduce early voting periods, invalidate the right to register as a voter on election day and withdraw the right to vote of citizens with a criminal record.
+Before the elections, no US State required voters to show a photo ID card.
+Indiana was the first State to impose such a requirement.
+In 2008, the Supreme Court of the United States the constitutionality of the Indiana law.
+The Republican authorities were quick to extend this practice to other States.
+Over the past two years, they sponsored bills in States to force voters to show a photo ID card.
+It is to note that, unlike Quebec, American citizens do not have a universal ID card such as the health insurance card.
+In fact, 11% of American citizens, i.e. 21 million people of voting age, do possess a photo ID card issued by a government agency of their
+In addition, five million new voters in do not have such
+And it often costs over a hundred dollars to obtain the required identity card.
+The new restrictions disproportionately affect young people, minorities and people with low incomes.
+In fact, 25% of Americans, of those earning less than $35,000; 18% of citizens over 65 and 20% of voters 18 to 29 years do not the required photo ID card.
+And that's not all.
+Students, voters considered to be voting more Democratic candidates, are not allowed in several States to use the photo ID card issued by their institution.
+On the other hand, these same allow fishing or hunting club members, who vote more Republican, to use the cards issued by these clubs when they vote.
+Prior to no State required proof of citizenship to vote.
+Arizona was the first to introduce such a requirement.
+Since 2011, a dozen States have adopted laws requiring voters to prove they are citizens.
+These measures are intended to limit the Hispanic vote.
+However, it appears that two out of three Hispanic voters favour the Democratic party.
+What is more, in 2011 Republican legislators sponsored laws abolishing the registration of on in eight States.
+In addition, they limited the right of individuals and groups to provide assistance to voters wishing to register.
+These restrictions are not without consequence.
+For example, the 2004 general election, voter campaigns contributed to registering around 10 million
+However, the measures adopted since 2009 have led a 17% drop in the registration rate of new voters 2010 compared to 2006.
+In addition, Republican legislators have enacted laws in five other States at reducing early voting period.
+For example, during the 2008 general in Florida, 33% of early voters were African-Americans, who accounted however for only 13% voters in the State.
+The same applied to Hispanics.
+These represented only 11% of voters, but 24% of citizens who voted early.
+On the other hand, 76% of voters were white but these represented only 46% of early voters.
+Of course, Democratic legislators and their supporters vigorously opposed the adoption of laws restricting voter registration.
+Several bills were blocked by vetoes of Democratic governors.
+The United States Attorney General intervened to suspend the most controversial laws.
+They were able to partially limit the damage.
+For example, only 16 out of 34 States have adopted laws requiring the presentation of a ID card.
+However, the new rules put in place will make it more difficult to exercise the right to vote in 2012.
+Democratic critics denounce the partisan character of the laws that have been passed and they see a clear objective of influencing the results in key States.
+A 2011 Brennan Centre report shows that the States that have adopted these laws represent 171 of the 270 needed in the electoral to win the Presidency.
+It is too early to say with certainty that these changes in the electoral system will have significant impacts on the the 2012 presidential elections.
+But thing is certain: these new provisions will have a negative the turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Prostate cancer screening: take the test or not?
+Indeed, the PSA test sometimes shows erroneous results false negative or even false positive results, which involve unnecessary medical interventions.
+Enough to make reluctant men to take screening tests.
+Take the test or not?
+We asked two specialists for their opinion.
+In studies conducted in the United States, there was a lot of contamination between control groups, so it is to interpret the data and make firm recommendations.
+Another study, this time a European one, concluded that there a in mortality between patients who screened and those who were not.
+This study also showed, with follow-up after 12 years, that it is 30 and 40% more likely for metastases to occur in absence of screening.
+I therefore recommend the test from age 50, or 40 if you have a direct relative who previously had prostate cancer.
+African-American men are also more at risk.
+The key is to the right decision once cancer has been detected.
+are aggressive cancers and others that are indolent.
+The patient really needs to be made to understand the degree of risk of his cancer, by offering him the available, not treating prostate cancers that are not long-term life threatening, and opting instead, in such cases, for active monitoring of the disease.
+Today, many men whom cancer has been detected will not be treated because their cancer is not aggressive and is not life threatening.
+Active monitoring will be suggested, and if the disease progresses, they will be offered treatment.
+More and more, specific criteria are being determined in order to decide who should or should not treated.
+Therefore I recommend taking the test.
+But the important is to have a discussion with your doctor to determine whether or not to take it.
+In collaboration with the Société internationale d'urologie [SIU], Movember has created a tool that makes it possible evaluate the pros and cons of the PSA test.
+You can download the document (in for time being, a [French] translation will be available at this address: http://ca.movember.com/fr/mens-health/prostate-cancer-screening
+Preventing the disease
+Unfortunately, there is no miracle for preventing cancer.
+Despite the progress in research, the adoption of healthy living habits remains the best way to the risk of suffering from it.
+It estimated that if everyone ate well and exercised enough, 30% of cancers could be prevented.
+"If no more people smoked, this rate increase to at least 50%," says André Beaulieu, spokesman for the Canadian Cancer Society.
+On the other hand, it is estimated that roughly 10% of cancers are hereditary.
+Some are also completely unexplained.
+For Canadian Cancer Society, the fight against tobacco remains a despite the decrease in the number of smokers.
+Cigarettes are linked to 85% of lung cancer cases.
+It is also a risk factor for a number of
+This massively damages people's health.
+"Even today, there are 1.5 million Quebec" deplores spokesperson André Beaulieu.
+Encouraging data: 10 years after giving up smoking, the risk of dying from cancer drops by half.
+Weight
+Overweight and obesity are also conducive to the onset of the disease, according to the SCC.
+They can increase the risks cancer of the breast, colon and rectum, oesophagus, pancreas and uterus.
+"Research shows that the regular practice of physical activity throughout your life protects against colon cancer" it is also said.
+Diet
+The organisation also recommends limiting your consumption of red meat.
+In large amounts, it increases the risks of developing colo-rectal cancer.
+Likewise, so do cured meat products, and these should be avoided.
+The conservation meat by drying or can cause the formation of carcinogens.
+"They can damage cells the body and lead to the development of cancer" it explained.
+Vitamins
+In recent a number of scientists have studied the links between vitamin supplements and cancer.
+For the time being however their research is inconclusive.
+Studies on vitamin E are contradictory, according to the SCC.
+While one noted a decrease in the risk of prostate cancer, another noted an increase.
diff --git a/tests/data/pred_real/valid.real b/tests/data/pred_real/valid.real
new file mode 100644
index 000000000000..327d64c29c6d
--- /dev/null
+++ b/tests/data/pred_real/valid.real
@@ -0,0 +1,100 @@
+A Republican strategy to counter the re-election of Obama
+Republican leaders justified their policy by the need to combat electoral fraud.
+However, the Brennan Centre considers this a myth, stating that electoral fraud is rarer in the United States than the number of people killed by lightning.
+Indeed, Republican lawyers identified only 300 cases of electoral fraud in the United States in a decade.
+One thing is certain: these new provisions will have a negative impact on voter turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Unlike in Canada, the American States are responsible for the organisation of federal elections in the United States.
+It is in this spirit that a majority of American governments have passed new laws since 2009 making the registration or voting process more difficult.
+This phenomenon gained momentum following the November 2010 elections, which saw 675 new Republican representatives added in 26 States.
+As a result, 180 bills restricting the exercise of the right to vote in 41 States were introduced in 2011 alone.
+The new election laws require voters to show a photo ID card and proof of US citizenship.
+Furthermore, these laws also reduce early voting periods, invalidate the right to register as a voter on election day and withdraw the right to vote of citizens with a criminal record.
+Before the 2006 elections, no US State required voters to show a photo ID card.
+Indiana was the first State to impose such a requirement.
+In 2008, the Supreme Court of the United States upheld the constitutionality of the Indiana law.
+The Republican authorities were quick to extend this practice to other States.
+Over the past two years, they sponsored bills in 34 States to force voters to show a photo ID card.
+It is important to note that, unlike Quebec, American citizens do not have a universal ID card such as the health insurance card.
+In fact, 11% of American citizens, i.e. 21 million people of voting age, do not possess a photo ID card issued by a government agency of their State.
+In addition, five million new voters in 2012 do not have such identification.
+And it often costs over a hundred dollars to obtain the required identity card.
+The new restrictions disproportionately affect young people, minorities and people with low incomes.
+In fact, 25% of African Americans, 15% of those earning less than $35,000; 18% of citizens over 65 and 20% of voters 18 to 29 years old do not have the required photo ID card.
+And that's not all.
+Students, voters considered to be voting more for Democratic candidates, are not allowed in several States to use the photo ID card issued by their institution.
+On the other hand, these same States allow fishing or hunting club members, who vote more Republican, to use the cards issued by these clubs when they vote.
+Prior to 2004, no State required proof of citizenship to vote.
+Arizona was the first to introduce such a requirement.
+Since 2011, a dozen States have adopted laws requiring voters to prove they are American citizens.
+These measures are clearly intended to limit the Hispanic vote.
+However, it appears that two out of three Hispanic voters favour the Democratic party.
+What is more, in 2011 Republican legislators sponsored laws abolishing the registration of voters on election day in eight States.
+In addition, they limited the right of individuals and groups to provide assistance to voters wishing to register.
+These restrictions are not without consequence.
+For example, during the 2004 general election, voter registration campaigns contributed to registering around 10 million citizens.
+However, the measures adopted since 2009 have led to a 17% drop in the registration rate of new voters in 2010 compared to 2006.
+In addition, Republican legislators have enacted laws in five other States aimed at reducing the early voting period.
+For example, during the 2008 general election in Florida, 33% of early voters were African-Americans, who accounted however for only 13% of voters in the State.
+The same applied to Hispanics.
+These represented only 11% of voters, but 24% of citizens who voted early.
+On the other hand, 76% of voters were white but these represented only 46% of early voters.
+Of course, Democratic legislators and their supporters vigorously opposed the adoption of laws restricting voter registration.
+Several bills were blocked by vetoes of Democratic governors.
+The United States Attorney General intervened to suspend the most controversial laws.
+They were able to partially limit the damage.
+For example, only 16 out of 34 States have adopted laws requiring the presentation of a photo ID card.
+However, the new rules put in place will undoubtedly make it more difficult to exercise the right to vote in 2012.
+Democratic critics denounce the partisan character of the laws that have been passed and they see a clear objective of influencing the 2012 results in key States.
+A 2011 Brennan Centre report shows that the States that have adopted these laws represent 171 of the 270 votes needed in the electoral college to win the Presidency.
+It is too early to say with certainty that these legislative changes in the electoral system will have significant impacts on the outcome of the 2012 presidential elections.
+But one thing is certain: these new provisions will have a negative impact on the turn-out.
+In this sense, the measures will partially undermine the American democratic system.
+Prostate cancer screening: take the test or not?
+Indeed, the PSA test sometimes shows erroneous results with false negative or even false positive results, which involve unnecessary medical interventions.
+Enough to make already reluctant men hesitate to take screening tests.
+Take the test or not?
+We asked two specialists for their opinion.
+In studies conducted in the United States, there was a lot of contamination between control groups, so it is difficult to interpret the data and make firm recommendations.
+Another study, this time a European one, concluded that there was a difference in mortality between patients who were screened and those who were not.
+This study also showed, with a follow-up after 12 years, that it is between 30 and 40% more likely for metastases to occur in the absence of screening.
+I therefore recommend the test from age 50, or 40 if you have a direct relative who previously had prostate cancer.
+African-American men are also more at risk.
+The key is to make the right decision once cancer has been detected.
+There are aggressive cancers and others that are indolent.
+The patient really needs to be made to understand the degree of risk of his cancer, by offering him the options available, not necessarily treating prostate cancers that are not long-term life threatening, and opting instead, in such cases, for active monitoring of the disease.
+Today, many men in whom cancer has been detected will not be treated because their cancer is not aggressive and is not life threatening.
+Active monitoring will be suggested, and if the disease progresses, they will be offered treatment.
+More and more, specific criteria are being determined in order to decide who should or should not be treated.
+Therefore I recommend taking the test.
+But the important thing is to have a discussion with your doctor to determine whether or not to take it.
+In collaboration with the Société internationale d'urologie [SIU], Movember has created a tool that makes it possible to evaluate the pros and cons of the PSA test.
+You can download the document (in English for the time being, a [French] translation will be available shortly) at this address: http://ca.movember.com/fr/mens-health/prostate-cancer-screening
+Preventing the disease
+Unfortunately, there is no miracle recipe for preventing cancer.
+Despite the progress in research, the adoption of healthy living habits remains the best way to reduce the risk of suffering from it.
+It is estimated that if everyone ate well and exercised enough, 30% of cancers could be prevented.
+"If no more people smoked, this rate would increase to at least 50%," says André Beaulieu, spokesman for the Canadian Cancer Society.
+On the other hand, it is estimated that roughly 10% of cancers are hereditary.
+Some are also completely unexplained.
+For the Canadian Cancer Society, the fight against tobacco remains a priority, despite the decrease in the number of smokers.
+Cigarettes are linked to 85% of lung cancer cases.
+It is also a risk factor for a number of others.
+This massively damages people's health.
+"Even today, there are 1.5 million smokers in Quebec" deplores spokesperson André Beaulieu.
+Encouraging data: 10 years after giving up smoking, the risk of dying from cancer drops by half.
+Weight
+Overweight and obesity are also conducive to the onset of the disease, according to the SCC.
+They can increase the risks of cancer of the breast, colon and rectum, oesophagus, pancreas and uterus.
+"Research shows that the regular practice of physical activity throughout your life protects against colon cancer" it is also said.
+Diet
+The organisation also recommends limiting your consumption of red meat.
+In large amounts, it increases the risks of developing colo-rectal cancer.
+Likewise, so do cured meat products, and these should be avoided.
+The conservation of meat by smoking, drying or curing can cause the formation of carcinogens.
+"They can damage cells in the body and lead to the development of cancer" it is explained.
+Vitamins
+In recent years, a number of scientists have studied the links between vitamin supplements and cancer.
+For the time being however their research is inconclusive.
+Studies on vitamin E are contradictory, according to the SCC.
+While one study noted a decrease in the risk of prostate cancer, another noted an increase.
diff --git a/tests/data/quartznet_speech_recognition.yaml b/tests/data/quartznet_speech_recognition.yaml
new file mode 100644
index 000000000000..fde97d3b83df
--- /dev/null
+++ b/tests/data/quartznet_speech_recognition.yaml
@@ -0,0 +1,101 @@
+model: "QuartzNet"
+sample_rate: 16000
+dropout: &drop 0.0
+rep: &rep 1
+n_mels: &n_mels 64
+se: &se true
+kernel_size_factor: &kfactor 2.0
+
+AudioToTextDataLayer:
+    train:
+        shuffle: true
+    eval:
+        shuffle: false
+        max_duration: null
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    features: 64
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: true
+
+SpectrogramAugmentation:
+    rect_masks: 5
+    rect_time: 120
+    rect_freq: 50
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+    conv_mask: true
+
+    jasper:
+        - filters: 32
+          repeat: 1
+          kernel: [11]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+          separable: true
+          se: *se
+          kernel_size_factor: *kfactor
+
+        - filters: 32
+          repeat: *rep
+          kernel: [11]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          se: *se
+          kernel_size_factor: *kfactor
+
+        - filters: 32
+          repeat: *rep
+          kernel: [13]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          se: *se
+          kernel_size_factor: *kfactor
+
+        - filters: 32
+          repeat: *rep
+          kernel: [17]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: true
+          separable: true
+          se: *se
+          kernel_size_factor: *kfactor
+
+        - filters: 32
+          repeat: 1
+          kernel: [29]
+          stride: [1]
+          dilation: [2]
+          dropout: *drop
+          residual: false
+          separable: true
+          se: *se
+          kernel_size_factor: *kfactor
+
+        - filters: 32
+          repeat: 1
+          kernel: [1]
+          stride: [1]
+          dilation: [1]
+          dropout: *drop
+          residual: false
+
+labels: ["dog", "cat"]
diff --git a/tests/data/quartznet_spkr_test.yaml b/tests/data/quartznet_spkr_test.yaml
new file mode 100644
index 000000000000..d0ec8b33d7e2
--- /dev/null
+++ b/tests/data/quartznet_spkr_test.yaml
@@ -0,0 +1,81 @@
+model: "GramVoxNet"
+sample_rate: &sample_rate 16000
+dropout: &drop 0.5
+repeat:  &rep  1
+time_length: 8
+n_filters: &n_filters 512
+
+AudioToSpeechLabelDataLayer:
+    sample_rate: *sample_rate
+    train:
+        min_duration: 0.1
+        shuffle: true
+    eval:
+        min_duration: 0.01
+        shuffle: false
+
+AudioToMelSpectrogramPreprocessor:
+    normalize: "per_feature"
+    window_size: 0.02
+    window_stride: 0.01
+    window: "hann"
+    features: &n_mels 64
+    n_fft: 512
+    frame_splicing: 1
+    dither: 0.00001
+    stft_conv: false
+
+JasperEncoder:
+    feat_in: *n_mels
+    activation: "relu"
+
+    jasper:
+        -   filters: *n_filters
+            repeat: 1
+            kernel: [3]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [5]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [7]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: *n_filters
+            repeat: *rep
+            kernel: [9]
+            stride: [1]
+            dilation: [1]
+            dropout: *drop
+            residual: true
+            seperable: true
+
+        -   filters: &enc_feat_out 1500
+            repeat: 1
+            kernel: [1]
+            stride: [1]
+            dilation: [1]
+            dropout: 0.0
+            residual: false
+            seperable: true
+
+JasperDecoderForSpkrClass:
+    feat_in: *enc_feat_out
+    pool_mode: 'xvector'
+    emb_sizes: 128,128
diff --git a/tests/data/speech_commands.tar.xz b/tests/data/speech_commands.tar.xz
new file mode 100644
index 000000000000..8caa1fbd3afc
Binary files /dev/null and b/tests/data/speech_commands.tar.xz differ
diff --git a/tests/docs/test_documentation.py b/tests/docs/test_documentation.py
new file mode 100644
index 000000000000..ada0bdeec639
--- /dev/null
+++ b/tests/docs/test_documentation.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+from unittest import TestCase
+
+import pytest
+from sphinx.application import Sphinx
+
+
+class DocTest(TestCase):
+    source_dir = u'docs/sources/source/'
+    config_dir = u'docs/sources/source/'
+    output_dir = u'docs/sources/source/test_build'
+    doctree_dir = u'docs/sources/source/test_build/doctrees'
+
+    all_files = True
+
+    @pytest.mark.docs
+    def test_html_documentation(self):
+        """ Tests whether the HTML documentation can be build properly. """
+        app = Sphinx(
+            self.source_dir,
+            self.config_dir,
+            self.output_dir,
+            self.doctree_dir,
+            buildername='html',
+            warningiserror=True,
+        )
+        app.build(force_all=self.all_files)
diff --git a/tests/integration/core/test_integration_neural_graph.py b/tests/integration/core/test_integration_neural_graph.py
new file mode 100644
index 000000000000..bdbcacd64a2c
--- /dev/null
+++ b/tests/integration/core/test_integration_neural_graph.py
@@ -0,0 +1,63 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.backends.pytorch.actions import PtActions
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import NeuralGraph
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralGraphTrainAction:
+    @pytest.mark.integration
+    def test_explicit_graph(self):
+        """
+            Tests the integration of an `explicit` graph with actions API.
+            In particular, checks whether user can pass NG instance to train().
+        """
+        # Create modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=4)
+        fx = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the g0 graph.
+        g0 = NeuralGraph()
+
+        # Activate the "g0 graph context" - all operations will be recorded to g0.
+        with g0:
+            x, t = dl()
+            p = fx(x=x)
+            lss = loss(predictions=p, target=t)
+            # Bind the loss output.
+            g0.outputs["loss"] = lss
+
+        # Instantiate an optimizer to perform the `train` action.
+        optimizer = PtActions()
+
+        # Make sure user CANNOT pass training graph and tensors_to_optimize.
+        with pytest.raises(ValueError):
+            optimizer.train(
+                tensors_to_optimize=lss,
+                training_graph=g0,
+                optimization_params={"max_steps": 1, "lr": 0.0003},
+                optimizer="sgd",
+            )
+
+        # But user can invoke "train" action using graph only.
+        optimizer.train(training_graph=g0, optimization_params={"max_steps": 1, "lr": 0.0003}, optimizer="sgd")
diff --git a/tests/integration/test_asr_gradient_step_and_eval.py b/tests/integration/test_asr_gradient_step_and_eval.py
new file mode 100644
index 000000000000..230ad2708be6
--- /dev/null
+++ b/tests/integration/test_asr_gradient_step_and_eval.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import tarfile
+from functools import partial
+from unittest import TestCase
+
+import pytest
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.core import EvaluatorCallback, SimpleLossLoggerCallback
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestASRIntegrationPytorch(TestCase):
+    labels = [
+        " ",
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z",
+        "'",
+    ]
+    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
+    featurizer_config = {
+        'window': 'hann',
+        'dither': 1e-05,
+        'normalize': 'per_feature',
+        'frame_splicing': 1,
+        'int_values': False,
+        'window_stride': 0.01,
+        'sample_rate': 16000,
+        'features': 64,
+        'n_fft': 512,
+        'window_size': 0.02,
+    }
+    yaml = YAML(typ="safe")
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test ASR data")
+        if not os.path.exists(os.path.join(data_folder, "asr")):
+            logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr")))
+            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr")))
+
+    @staticmethod
+    def print_and_log_loss(loss_tensor, loss_log_list):
+        """A helper function that is passed to SimpleLossLoggerCallback. It prints loss_tensors and appends to
+        the loss_log_list list.
+
+        Args:
+            loss_tensor (NMTensor): tensor representing loss. Loss should be a scalar
+            loss_log_list (list): empty list
+        """
+        logging.info(f'Train Loss: {str(loss_tensor[0].item())}')
+        loss_log_list.append(loss_tensor[0].item())
+
+    @pytest.mark.integration
+    def test_jasper_training(self):
+        """Integtaion test that instantiates a small Jasper model and tests training with the sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        Note: Training is done with batch gradient descent as opposed to stochastic gradient descent due to CTC loss
+        """
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
+            jasper_model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToTextDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=30,
+        )
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(
+            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+            **jasper_model_definition['JasperEncoder'],
+        )
+        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.001},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_quartznet_training(self):
+        """Integtaion test that instantiates a small QuartzNet model and tests training with the sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        Note: Training is done with batch gradient descent as opposed to stochastic gradient descent due to CTC loss
+        """
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/quartznet_test.yaml"))) as f:
+            quartz_model_definition = self.yaml.load(f)
+        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=30)
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(
+            feat_in=quartz_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+            **quartz_model_definition['JasperEncoder'],
+        )
+        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.001},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_contextnet_ctc_training(self):
+        """Integtaion test that instantiates a small ContextNet model and tests training with the sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        Note: Training is done with batch gradient descent as opposed to stochastic gradient descent due to CTC loss
+        Checks SE-block with fixed context size and global context, residual_mode='stride_add' and 'stride_last' flags
+        """
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/contextnet_32.yaml"))) as f:
+            contextnet_model_definition = self.yaml.load(f)
+        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=30)
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 80,
+            'window_size': 0.025,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+
+        spec_aug = nemo_asr.SpectrogramAugmentation(**contextnet_model_definition['SpectrogramAugmentation'])
+
+        contextnet_encoder = nemo_asr.ContextNetEncoder(
+            feat_in=contextnet_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+            **contextnet_model_definition['ContextNetEncoder'],
+        )
+        contextnet_decoder = nemo_asr.ContextNetDecoderForCTC(feat_in=32, hidden_size=16, num_classes=len(self.labels))
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        processed_signal = spec_aug(input_spec=processed_signal)
+
+        encoded, encoded_len = contextnet_encoder(audio_signal=processed_signal, length=p_length)
+        log_probs = contextnet_decoder(encoder_output=encoded)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.001},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_stft_conv_training(self):
+        """Integtaion test that instantiates a small Jasper model and tests training with the sample asr data.
+        test_stft_conv_training tests the torch_stft path while test_jasper_training tests the torch.stft path inside
+        of AudioToMelSpectrogramPreprocessor.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        Note: Training is done with batch gradient descent as opposed to stochastic gradient descent due to CTC loss
+        """
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
+            jasper_model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=30)
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+            'stft_conv': True,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(
+            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+            **jasper_model_definition['JasperEncoder'],
+        )
+        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
+
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.001},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_quartznet_model_training(self):
+        """Integtaion test that instantiates a small Jasper model and tests training with the sample asr data.
+        test_stft_conv_training tests the torch_stft path while test_jasper_training tests the torch.stft path inside
+        of AudioToMelSpectrogramPreprocessor.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        Note: Training is done with batch gradient descent as opposed to stochastic gradient descent due to CTC loss
+        """
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../../examples/asr/configs/jasper_an4.yaml"))
+        ) as file:
+            model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=30)
+        model = nemo_asr.models.ASRConvCTCModel(
+            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
+            encoder_params=model_definition['JasperEncoder'],
+            decoder_params=model_definition['JasperDecoderForCTC'],
+        )
+        model.train()
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        log_probs, encoded_len = model(input_signal=audio_signal, length=a_sig_length)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+
+        loss_list = []
+        callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.001},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    @pytest.mark.skipduringci
+    def test_jasper_evaluation(self):
+        """Integration test that tests EvaluatorCallback and NeuralModuleFactory.eval(). This test is skipped during
+        CI as it is redundant with the Jenkins Jasper ASR CI tests.
+        """
+        # Note this test still has no asserts, but rather checks that the current eval path works
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))) as file:
+            jasper_model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToTextDataLayer(manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=8)
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(
+            feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'],
+            **jasper_model_definition['JasperEncoder'],
+        )
+        jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels))
+        ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels))
+        greedy_decoder = nemo_asr.GreedyCTCDecoder()
+        # DAG
+        audio_signal, a_sig_length, transcript, transcript_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        loss = ctc_loss(
+            log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len,
+        )
+        predictions = greedy_decoder(log_probs=log_probs)
+
+        from nemo.collections.asr.helpers import (
+            process_evaluation_batch,
+            process_evaluation_epoch,
+        )
+
+        eval_callback = EvaluatorCallback(
+            eval_tensors=[loss, predictions, transcript, transcript_len],
+            user_iter_callback=lambda x, y: process_evaluation_batch(x, y, labels=self.labels),
+            user_epochs_done_callback=process_evaluation_epoch,
+        )
+        # Instantiate an optimizer to perform `train` action
+        self.nf.eval(callbacks=[eval_callback])
diff --git a/tests/integration/test_integration_multidataset.py b/tests/integration/test_integration_multidataset.py
new file mode 100644
index 000000000000..4eee92058e8b
--- /dev/null
+++ b/tests/integration/test_integration_multidataset.py
@@ -0,0 +1,68 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import shutil
+from unittest import TestCase
+
+import pytest
+import torch
+
+import nemo
+from nemo.backends.pytorch.common import DataCombination
+from nemo.core import ChannelType, NeuralType
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestMultiDLIntegration(TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+
+    @pytest.mark.integration
+    def test_pipeline(self):
+        batch_size = 4
+        dataset_size_0 = 100
+        dataset_size_1 = 100
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_0)
+        dl_2 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_1)
+
+        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+            data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode=DataCombination.ZIP
+        )
+        x_0, y_0, x_1, y_1 = data_layer()
+
+        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+        loss = nemo.backends.pytorch.tutorials.MSELoss()
+        combined_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
+        pred_0 = trainable_module(x=x_0)
+        pred_1 = trainable_module(x=x_1)
+        l_0 = loss(predictions=pred_0, target=y_0)
+        l_1 = loss(predictions=pred_1, target=y_1)
+        total_loss = combined_loss(loss_1=l_0, loss_2=l_1)
+
+        callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=[total_loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
+        )
+        # Instantiate an optimizer to perform `train` action
+        optimizer = nemo.backends.pytorch.actions.PtActions()
+        optimizer.train(
+            tensors_to_optimize=[total_loss], optimizer="sgd", optimization_params={"lr": 0.0003, "max_steps": 2},
+        )
diff --git a/tests/integration/test_speaker_recognition_gradient_step.py b/tests/integration/test_speaker_recognition_gradient_step.py
new file mode 100644
index 000000000000..cf2535e9c9af
--- /dev/null
+++ b/tests/integration/test_speaker_recognition_gradient_step.py
@@ -0,0 +1,115 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+import shutil
+import tarfile
+from functools import partial
+
+import pytest
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestSpeakerRecognitonPytorch:
+    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/an4_speaker/train.json"))
+    yaml = YAML(typ="safe")
+
+    @classmethod
+    def setup_class(cls) -> None:
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for speaker related data")
+        if not os.path.exists(os.path.join(data_folder, "an4_speaker")):
+            logging.info("Extracting speaker related files to: {0}".format(os.path.join(data_folder, "an4_speaker")))
+            tar = tarfile.open(os.path.join(data_folder, "an4_speaker.tar.gz"), "r:gz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("Speech Command data found in: {0}".format(os.path.join(data_folder, "an4_speaker")))
+
+    @classmethod
+    def teardown_class(cls) -> None:
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test an4 data")
+        if os.path.exists(os.path.join(data_folder, "an4_speaker")):
+            shutil.rmtree(os.path.join(data_folder, "an4_speaker"))
+
+    @staticmethod
+    def print_and_log_loss(loss_tensor, loss_log_list):
+        """A helper function that is passed to SimpleLossLoggerCallback. It prints loss_tensors and appends to
+        the loss_log_list list.
+
+        Args:
+            loss_tensor (NMTensor): tensor representing loss. Loss should be a scalar
+            loss_log_list (list): empty list
+        """
+        logging.info(f'Train Loss: {str(loss_tensor[0].item())}')
+        loss_log_list.append(loss_tensor[0].item())
+
+    @pytest.mark.integration
+    def test_quartznet_speaker_reco_training(self):
+        """Integtaion test that instantiates a small QuartzNet model for speaker recognition and tests training with the
+        sample an4 data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        """
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/quartznet_spkr_test.yaml"))
+        ) as file:
+            spkr_params = self.yaml.load(file)
+        dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            manifest_filepath=self.manifest_filepath, labels=None, batch_size=10,
+        )
+        sample_rate = 16000
+
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
+            sample_rate=sample_rate, **spkr_params["AudioToMelSpectrogramPreprocessor"],
+        )
+        jasper_encoder = nemo_asr.JasperEncoder(**spkr_params['JasperEncoder'])
+        jasper_decoder = nemo_asr.JasperDecoderForSpkrClass(
+            feat_in=spkr_params['JasperEncoder']['jasper'][-1]['filters'],
+            num_classes=dl.num_classes,
+            pool_mode=spkr_params['JasperDecoderForSpkrClass']['pool_mode'],
+            emb_sizes=spkr_params["JasperDecoderForSpkrClass"]["emb_sizes"].split(","),
+        )
+        ce_loss = nemo_asr.CrossEntropyLossNM()
+
+        # DAG
+        audio_signal, a_sig_length, targets, targets_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        log_probs, _ = jasper_decoder(encoder_output=encoded)
+        loss = ce_loss(logits=log_probs, labels=targets)
+
+        loss_list = []
+        callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+        self.nf.random_seed = 42
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 4, "lr": 0.002},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
diff --git a/tests/integration/test_speechcommands_gradient_step_and_eval.py b/tests/integration/test_speechcommands_gradient_step_and_eval.py
new file mode 100644
index 000000000000..c997ca98ad94
--- /dev/null
+++ b/tests/integration/test_speechcommands_gradient_step_and_eval.py
@@ -0,0 +1,196 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+import shutil
+import tarfile
+from functools import partial
+from unittest import TestCase
+
+import pytest
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestSpeechCommandsPytorch(TestCase):
+    labels = [
+        "cat",
+        "dog",
+    ]
+    manifest_filepath = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "../data/speech_commands/train_manifest.json")
+    )
+    featurizer_config = {
+        'window': 'hann',
+        'dither': 1e-05,
+        'normalize': 'per_feature',
+        'frame_splicing': 1,
+        'int_values': False,
+        'window_stride': 0.01,
+        'sample_rate': 16000,
+        'features': 64,
+        'n_fft': 512,
+        'window_size': 0.02,
+    }
+    yaml = YAML(typ="safe")
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test speech command data")
+        if not os.path.exists(os.path.join(data_folder, "speech_commands")):
+            logging.info(
+                "Extracting speech commands data to: {0}".format(os.path.join(data_folder, "speech_commands"))
+            )
+            tar = tarfile.open(os.path.join(data_folder, "speech_commands.tar.xz"), "r:xz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("Speech Command data found in: {0}".format(os.path.join(data_folder, "speech_commands")))
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        super().tearDownClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test ASR data")
+        if os.path.exists(os.path.join(data_folder, "speech_commands")):
+            shutil.rmtree(os.path.join(data_folder, "speech_commands"))
+
+    @staticmethod
+    def print_and_log_loss(loss_tensor, loss_log_list):
+        """A helper function that is passed to SimpleLossLoggerCallback. It prints loss_tensors and appends to
+        the loss_log_list list.
+
+        Args:
+            loss_tensor (NMTensor): tensor representing loss. Loss should be a scalar
+            loss_log_list (list): empty list
+        """
+        logging.info(f'Train Loss: {str(loss_tensor[0].item())}')
+        loss_log_list.append(loss_tensor[0].item())
+
+    @pytest.mark.integration
+    def test_quartznet_speech_commands_training(self):
+        """Integtaion test that instantiates a small QuartzNet model for speech commands and tests training with the
+        sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        """
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/quartznet_speech_recognition.yaml"))
+        ) as file:
+            jasper_model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=6,
+        )
+        pre_process_params = pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(**jasper_model_definition['JasperEncoder'])
+        jasper_decoder = nemo_asr.JasperDecoderForClassification(
+            feat_in=jasper_model_definition['JasperEncoder']['jasper'][-1]['filters'], num_classes=len(self.labels)
+        )
+        ce_loss = nemo_asr.CrossEntropyLossNM()
+
+        # DAG
+        audio_signal, a_sig_length, targets, targets_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        log_probs = jasper_decoder(encoder_output=encoded)
+        loss = ce_loss(logits=log_probs, labels=targets)
+
+        loss_list = []
+        callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=[loss], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+
+        self.nf.train(
+            [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.003},
+        )
+        self.nf.reset_trainer()
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_quartznet_speech_commands_eval(self):
+        """Integration test that tests EvaluatorCallback and NeuralModuleFactory.eval().
+        """
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/quartznet_speech_recognition.yaml"))
+        ) as file:
+            jasper_model_definition = self.yaml.load(file)
+        dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=2,
+        )
+        pre_process_params = {
+            'frame_splicing': 1,
+            'features': 64,
+            'window_size': 0.02,
+            'n_fft': 512,
+            'dither': 1e-05,
+            'window': 'hann',
+            'sample_rate': 16000,
+            'normalize': 'per_feature',
+            'window_stride': 0.01,
+        }
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(**pre_process_params)
+        jasper_encoder = nemo_asr.JasperEncoder(**jasper_model_definition['JasperEncoder'])
+        jasper_decoder = nemo_asr.JasperDecoderForClassification(
+            feat_in=jasper_model_definition['JasperEncoder']['jasper'][-1]['filters'], num_classes=len(self.labels)
+        )
+        ce_loss = nemo_asr.CrossEntropyLossNM()
+
+        # DAG
+        audio_signal, a_sig_length, targets, targets_len = dl()
+        processed_signal, p_length = preprocessing(input_signal=audio_signal, length=a_sig_length)
+
+        encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length)
+        # logging.info(jasper_encoder)
+        logits = jasper_decoder(encoder_output=encoded)
+        loss = ce_loss(logits=logits, labels=targets,)
+
+        from nemo.collections.asr.helpers import (
+            process_classification_evaluation_batch,
+            process_classification_evaluation_epoch,
+        )
+
+        eval_callback = nemo.core.EvaluatorCallback(
+            eval_tensors=[loss, logits, targets],
+            user_iter_callback=lambda x, y: process_classification_evaluation_batch(x, y, top_k=[1]),
+            user_epochs_done_callback=process_classification_evaluation_epoch,
+        )
+        # Instantiate an optimizer to perform `train` action
+        self.nf.eval(callbacks=[eval_callback])
diff --git a/tests/integration/test_tts_gradient_step.py b/tests/integration/test_tts_gradient_step.py
new file mode 100644
index 000000000000..8b8e500d81f4
--- /dev/null
+++ b/tests/integration/test_tts_gradient_step.py
@@ -0,0 +1,327 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import pathlib
+import tarfile
+from functools import partial
+from unittest import TestCase
+
+import numpy as np
+import pytest
+
+import nemo.collections.asr as nemo_asr
+import nemo.collections.tts as nemo_tts
+from nemo.backends.pytorch.actions import PtActions
+from nemo.core import SimpleLossLoggerCallback
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestTTSPytorch(TestCase):
+    labels = [
+        " ",
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z",
+        "'",
+    ]
+    manifest_filepath = "tests/data/asr/an4_train.json"
+
+    def setUp(self) -> None:
+        super().setUp()
+        data_folder = "tests/data/"
+        logging.info("Looking up for test speech data")
+        if not os.path.exists(data_folder + "asr"):
+            logging.info("Extracting speech data to: {0}".format(data_folder + "asr"))
+            tar = tarfile.open("tests/data/asr.tar.gz", "r:gz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("speech data found in: {0}".format(data_folder + "asr"))
+
+    @staticmethod
+    def print_and_log_loss(loss_tensor, loss_log_list):
+        """A helper function that is passed to SimpleLossLoggerCallback. It prints loss_tensors and appends to
+        the loss_log_list list.
+
+        Args:
+            loss_tensor (NMTensor): tensor representing loss. Loss should be a scalar
+            loss_log_list (list): empty list
+        """
+        logging.info(f'Train Loss: {str(loss_tensor[0].item())}')
+        loss_log_list.append(loss_tensor[0].item())
+
+    @pytest.mark.integration
+    @pytest.mark.run_only_on('GPU')
+    def test_tacotron2_training(self):
+        """Integtaion test that instantiates a smaller Tacotron2 model and tests training with the sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        """
+        data_layer = nemo_asr.AudioToTextDataLayer(
+            manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4
+        )
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
+            window_size=None,
+            window_stride=None,
+            n_window_size=512,
+            n_window_stride=128,
+            normalize=None,
+            preemph=None,
+            dither=0,
+            mag_power=1.0,
+            pad_value=-11.52,
+            log_zero_guard_type="clamp",
+            log_zero_guard_value=1e-05,
+        )
+        text_embedding = nemo_tts.TextEmbedding(len(self.labels), 256)
+        t2_enc = nemo_tts.Tacotron2Encoder(encoder_n_convolutions=2, encoder_kernel_size=5, encoder_embedding_dim=256)
+        t2_dec = nemo_tts.Tacotron2Decoder(
+            n_mel_channels=64,
+            n_frames_per_step=1,
+            encoder_embedding_dim=256,
+            gate_threshold=0.5,
+            prenet_dim=128,
+            max_decoder_steps=1000,
+            decoder_rnn_dim=512,
+            p_decoder_dropout=0.1,
+            p_attention_dropout=0.1,
+            attention_rnn_dim=512,
+            attention_dim=64,
+            attention_location_n_filters=16,
+            attention_location_kernel_size=15,
+        )
+        t2_postnet = nemo_tts.Tacotron2Postnet(
+            n_mel_channels=64, postnet_embedding_dim=256, postnet_kernel_size=5, postnet_n_convolutions=3
+        )
+        t2_loss = nemo_tts.Tacotron2Loss()
+        makegatetarget = nemo_tts.MakeGate()
+
+        # DAG
+        audio, audio_len, transcript, transcript_len = data_layer()
+        spec_target, spec_target_len = preprocessing(input_signal=audio, length=audio_len)
+
+        transcript_embedded = text_embedding(char_phone=transcript)
+        transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len)
+        mel_decoder, gate, _ = t2_dec(
+            char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target
+        )
+        mel_postnet = t2_postnet(mel_input=mel_decoder)
+        gate_target = makegatetarget(mel_target=spec_target, target_len=spec_target_len)
+        loss_t = t2_loss(
+            mel_out=mel_decoder,
+            mel_out_postnet=mel_postnet,
+            gate_out=gate,
+            mel_target=spec_target,
+            gate_target=gate_target,
+            target_len=spec_target_len,
+            seq_len=audio_len,
+        )
+        loss_list = []
+
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss_t], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+        # Instantiate an optimizer to perform `train` action
+        optimizer = PtActions()
+        optimizer.train(
+            [loss_t], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.01}
+        )
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    @pytest.mark.run_only_on('GPU')
+    def test_waveglow_training(self):
+        """Integtaion test that instantiates a smaller WaveGlow model and tests training with the sample asr data.
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        """
+        data_layer = nemo_tts.AudioDataLayer(
+            manifest_filepath=self.manifest_filepath, n_segments=4000, batch_size=4, sample_rate=16000
+        )
+        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
+            window_size=None,
+            window_stride=None,
+            n_window_size=512,
+            n_window_stride=128,
+            normalize=None,
+            preemph=None,
+            dither=0,
+            mag_power=1.0,
+            pad_value=-11.52,
+        )
+        waveglow = nemo_tts.WaveGlowNM(
+            n_mel_channels=64,
+            n_flows=6,
+            n_group=4,
+            n_early_every=4,
+            n_early_size=2,
+            n_wn_layers=4,
+            n_wn_channels=256,
+            wn_kernel_size=3,
+            sample_rate=16000,
+        )
+        waveglow_loss = nemo_tts.WaveGlowLoss(sample_rate=16000)
+
+        # DAG
+        audio, audio_len, = data_layer()
+        spec_target, _ = preprocessing(input_signal=audio, length=audio_len)
+
+        z, log_s_list, log_det_W_list = waveglow(mel_spectrogram=spec_target, audio=audio)
+        loss_t = waveglow_loss(z=z, log_s_list=log_s_list, log_det_W_list=log_det_W_list)
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss_t], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+        # Instantiate an optimizer to perform `train` action
+        optimizer = PtActions()
+        optimizer.train(
+            [loss_t], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.01}
+        )
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
+
+    @pytest.mark.integration
+    def test_fastspeech(self):
+        """Integtaion test that instantiates a FastSpeech model and tests training with the sample asr data.
+        Note instantiating the FastSpeech model additionally requires creating speech durations which additionally
+        tests NeuralModuleFactory.infer().
+        Training is run for 3 forward and backward steps and asserts that loss after 3 steps is smaller than the loss
+        at the first step.
+        """
+        data_layer = nemo_asr.AudioToTextDataLayer(
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=1,
+            shuffle=False,
+            sample_rate=16000,
+        )
+
+        data_preprocessor = nemo_asr.AudioToMelSpectrogramPreprocessor(
+            window_size=None,
+            window_stride=None,
+            n_window_size=512,
+            n_window_stride=128,
+            normalize=None,
+            preemph=None,
+            dither=0,
+            mag_power=1.0,
+            pad_value=-11.52,
+            pad_to=0,
+            log_zero_guard_type="clamp",
+            log_zero_guard_value=1e-05,
+        )
+
+        data = data_layer()
+        spec, spec_length = data_preprocessor(input_signal=data.audio_signal, length=data.a_sig_length)
+
+        # Creates and saves durations as numpy arrays.
+        durs_dir = pathlib.Path('tests/data/asr/durs')
+        durs_dir.mkdir(exist_ok=True)
+        result = self.nf.infer([data.transcripts, data.transcript_length, spec_length, spec])
+        k = -1
+        for text, text_len, mel_len, mel in zip(result[0], result[1], result[2], result[3]):
+            text = text.cpu().numpy()[0][: text_len.cpu().numpy()[0]]
+            dur = np.zeros(text.shape[0], dtype=np.long)
+            dur_sum = mel_len.cpu().numpy()[0] + 1  # TODO: delete `+1`
+            dur[0] = dur_sum - 4
+            dur[1] = 4
+            k += 1
+            np.save(durs_dir / f'{k}.npy', dur, allow_pickle=False)
+
+        data_layer = nemo_tts.FastSpeechDataLayer(
+            manifest_filepath=self.manifest_filepath,
+            durs_dir=durs_dir,
+            labels=self.labels,
+            batch_size=4,
+            sample_rate=16000,
+        )
+
+        fastspeech = nemo_tts.FastSpeech(
+            decoder_output_size=384,
+            n_mels=64,
+            max_seq_len=2048,
+            word_vec_dim=384,
+            encoder_n_layer=6,
+            encoder_head=2,
+            encoder_conv1d_filter_size=1536,
+            decoder_n_layer=6,
+            decoder_head=2,
+            decoder_conv1d_filter_size=1536,
+            fft_conv1d_kernel=3,
+            fft_conv1d_padding=1,
+            encoder_output_size=384,
+            duration_predictor_filter_size=256,
+            duration_predictor_kernel_size=3,
+            dropout=0.1,
+            alpha=1.0,
+            n_src_vocab=len(self.labels),
+            pad_id=0,
+        )
+
+        loss = nemo_tts.FastSpeechLoss()
+
+        data = data_layer()
+        mel_true, _ = data_preprocessor(input_signal=data.audio, length=data.audio_len)
+        mel_pred, dur_pred = fastspeech(
+            text=data.text, text_pos=data.text_pos, mel_true=mel_true, dur_true=data.dur_true,
+        )
+        loss_t = loss(
+            mel_true=mel_true, mel_pred=mel_pred, dur_true=data.dur_true, dur_pred=dur_pred, text_pos=data.text_pos,
+        )
+
+        loss_list = []
+        callback = SimpleLossLoggerCallback(
+            tensors=[loss_t], print_func=partial(self.print_and_log_loss, loss_log_list=loss_list), step_freq=1
+        )
+        # Instantiate an optimizer to perform `train` action
+        optimizer = PtActions()
+        optimizer.train(
+            [loss_t], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 3, "lr": 0.0003}
+        )
+
+        # Assert that training loss went down
+        assert loss_list[-1] < loss_list[0]
diff --git a/tests/nlp/__init__.py b/tests/nlp/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/core/test_infer.py b/tests/system/test_infer.py
similarity index 74%
rename from tests/core/test_infer.py
rename to tests/system/test_infer.py
index d9b11a3997da..660929d6b6f4 100644
--- a/tests/core/test_infer.py
+++ b/tests/system/test_infer.py
@@ -16,12 +16,15 @@
 # limitations under the License.
 # =============================================================================
 
+from unittest import TestCase
+
+import pytest
 import torch
 
 import nemo
 from nemo.backends.pytorch.nm import NonTrainableNM
-from nemo.core.neural_types import *
-from tests.common_setup import NeMoUnitTest
+from nemo.core.neural_types import AxisKind, AxisType, ChannelType, NeuralType
+from nemo.utils.decorators import add_port_docs
 
 
 class AddsTen(NonTrainableNM):
@@ -29,11 +32,13 @@ def __init__(self):
         super().__init__()
 
     @property
+    @add_port_docs()
     def input_ports(self):
         # return {"mod_in": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
         return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})}
         return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())}
@@ -47,10 +52,12 @@ def __init__(self):
         super().__init__()
 
     @property
+    @add_port_docs()
     def input_ports(self):
         return {"mod_in": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())}
 
     @property
+    @add_port_docs()
     def output_ports(self):
         return {"mod_out": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType())}
 
@@ -58,12 +65,15 @@ def forward(self, mod_in):
         return mod_in - 10
 
 
-class TestInfer(NeMoUnitTest):
-    def test_infer_caching(self):
-        neural_factory = nemo.core.neural_factory.NeuralModuleFactory(
-            backend=nemo.core.Backend.PyTorch, create_tb_writer=False
-        )
+@pytest.mark.usefixtures("neural_factory")
+class TestInfer(TestCase):
+    def setUp(self) -> None:
+        """ Re-instantiates Neural Factory for every test. """
+        # Re-initialize the default Neural Factory - on the indicated device.
+        self.nf = nemo.core.NeuralModuleFactory(placement=self.nf.placement)
 
+    @pytest.mark.system
+    def test_infer_caching(self):
         data_source = nemo.backends.pytorch.common.ZerosDataLayer(
             size=1,
             dtype=torch.FloatTensor,
@@ -80,18 +90,16 @@ def test_infer_caching(self):
         twenty_tensor = addten(mod_in=ten_tensor)
         thirty_tensor = addten(mod_in=twenty_tensor)
 
-        evaluated_tensors = neural_factory.infer(tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True)
+        evaluated_tensors = self.nf.infer(tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True)
         self.assertEqual(evaluated_tensors[0][0].squeeze().data, 20)
         self.assertEqual(evaluated_tensors[1][0].squeeze().data, 30)
 
         new_ten_tensor = minusten(mod_in=twenty_tensor)
-        evaluated_tensors = neural_factory.infer(tensors=[new_ten_tensor], verbose=False, use_cache=True)
+        evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, use_cache=True)
         self.assertEqual(evaluated_tensors[0][0].squeeze().data, 10)
 
+    @pytest.mark.system
     def test_infer_errors(self):
-        neural_factory = nemo.core.neural_factory.NeuralModuleFactory(
-            backend=nemo.core.Backend.PyTorch, create_tb_writer=False
-        )
 
         data_source = nemo.backends.pytorch.common.ZerosDataLayer(
             size=1,
@@ -110,21 +118,19 @@ def test_infer_errors(self):
         thirty_tensor = addten(mod_in=twenty_tensor)
 
         with self.assertRaisesRegex(ValueError, "use_cache was set, but cache was empty"):
-            evaluated_tensors = neural_factory.infer(
-                tensors=[twenty_tensor, thirty_tensor], verbose=False, use_cache=True
-            )
+            evaluated_tensors = self.nf.infer(tensors=[twenty_tensor, thirty_tensor], verbose=False, use_cache=True)
 
         new_ten_tensor = minusten(mod_in=twenty_tensor)
-        evaluated_tensors = neural_factory.infer(tensors=[new_ten_tensor], verbose=False, cache=True)
+        evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, cache=True)
 
         with self.assertRaisesRegex(ValueError, "cache was set but was not empty"):
-            evaluated_tensors = neural_factory.infer(tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True)
+            evaluated_tensors = self.nf.infer(tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True)
 
-        neural_factory.clear_cache()
-        evaluated_tensors = neural_factory.infer(tensors=[new_ten_tensor], verbose=False, cache=True)
+        self.nf.clear_cache()
+        evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, cache=True)
 
         with self.assertRaisesRegex(ValueError, "cache and use_cache were both set."):
-            evaluated_tensors = neural_factory.infer(
+            evaluated_tensors = self.nf.infer(
                 tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True, use_cache=True
             )
         self.assertEqual(evaluated_tensors[0][0].squeeze().data, 10)
diff --git a/tests/core/test_pytorch_trainers.py b/tests/system/test_pytorch_trainers.py
similarity index 61%
rename from tests/core/test_pytorch_trainers.py
rename to tests/system/test_pytorch_trainers.py
index 8f32aff85290..f3f442a06444 100644
--- a/tests/core/test_pytorch_trainers.py
+++ b/tests/system/test_pytorch_trainers.py
@@ -16,16 +16,19 @@
 # limitations under the License.
 # =============================================================================
 
-import nemo
-from tests.common_setup import NeMoUnitTest
+from unittest import TestCase
+
+import pytest
 
-logging = nemo.logging
+import nemo
 
 
-class TestPytorchTrainers(NeMoUnitTest):
+@pytest.mark.usefixtures("neural_factory")
+class TestPytorchTrainers(TestCase):
+    @pytest.mark.system
     def test_simple_train(self):
-        logging.info("Simplest train test")
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128)
+        """ Simplest train test """
+        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=32)
         trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
         loss = nemo.backends.pytorch.tutorials.MSELoss()
         x, y = data_source()
@@ -34,32 +37,13 @@ def test_simple_train(self):
 
         optimizer = nemo.backends.pytorch.actions.PtActions()
         optimizer.train(
-            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1},
-        )
-
-    def test_simple_train_named_output(self):
-        logging.info('Simplest train test with using named output.')
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128,)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-
-        data = data_source()
-        self.assertEqual(
-            first=type(data).__name__,
-            second='RealFunctionDataLayerOutput',
-            msg='Check output class naming coherence.',
-        )
-        y_pred = trainable_module(x=data.x)
-        loss_tensor = loss(predictions=y_pred, target=data.y)
-
-        optimizer = nemo.backends.pytorch.actions.PtActions()
-        optimizer.train(
-            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1},
+            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 2},
         )
 
+    @pytest.mark.system
     def test_simple_chained_train(self):
-        logging.info("Chained train test")
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=32)
+        """ Chained train test """
+        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=100, batch_size=32)
         trainable_module1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
         trainable_module2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=2)
         trainable_module3 = nemo.backends.pytorch.tutorials.TaylorNet(dim=2)
@@ -72,5 +56,5 @@ def test_simple_chained_train(self):
 
         optimizer = nemo.backends.pytorch.actions.PtActions()
         optimizer.train(
-            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1},
+            tensors_to_optimize=[loss_tensor], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 2},
         )
diff --git a/tests/tts/__init__.py b/tests/tts/__init__.py
deleted file mode 100644
index e69de29bb2d1..000000000000
diff --git a/tests/tts/test_tts.py b/tests/tts/test_tts.py
deleted file mode 100644
index 682a267f2e80..000000000000
--- a/tests/tts/test_tts.py
+++ /dev/null
@@ -1,188 +0,0 @@
-# ! /usr/bin/python
-# -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import os
-import tarfile
-
-import nemo
-import nemo.collections.asr as nemo_asr
-import nemo.collections.tts as nemo_tts
-from tests.common_setup import NeMoUnitTest
-
-logging = nemo.logging
-
-
-class TestTTSPytorch(NeMoUnitTest):
-    labels = [
-        " ",
-        "a",
-        "b",
-        "c",
-        "d",
-        "e",
-        "f",
-        "g",
-        "h",
-        "i",
-        "j",
-        "k",
-        "l",
-        "m",
-        "n",
-        "o",
-        "p",
-        "q",
-        "r",
-        "s",
-        "t",
-        "u",
-        "v",
-        "w",
-        "x",
-        "y",
-        "z",
-        "'",
-    ]
-    manifest_filepath = "tests/data/asr/an4_train.json"
-
-    def setUp(self) -> None:
-        super().setUp()
-        data_folder = "tests/data/"
-        logging.info("Looking up for test speech data")
-        if not os.path.exists(data_folder + "asr"):
-            logging.info("Extracting speech data to: {0}".format(data_folder + "asr"))
-            tar = tarfile.open("tests/data/asr.tar.gz", "r:gz")
-            tar.extractall(path=data_folder)
-            tar.close()
-        else:
-            logging.info("speech data found in: {0}".format(data_folder + "asr"))
-
-    def test_tacotron2_training(self):
-        data_layer = nemo_asr.AudioToTextDataLayer(
-            manifest_filepath=self.manifest_filepath, labels=self.labels, batch_size=4,
-        )
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
-            window_size=None,
-            window_stride=None,
-            n_window_size=512,
-            n_window_stride=128,
-            normalize=None,
-            preemph=None,
-            dither=0,
-            mag_power=1.0,
-            pad_value=-11.52,
-        )
-        text_embedding = nemo_tts.TextEmbedding(len(self.labels), 256)
-        t2_enc = nemo_tts.Tacotron2Encoder(encoder_n_convolutions=2, encoder_kernel_size=5, encoder_embedding_dim=256,)
-        t2_dec = nemo_tts.Tacotron2Decoder(
-            n_mel_channels=64,
-            n_frames_per_step=1,
-            encoder_embedding_dim=256,
-            gate_threshold=0.5,
-            prenet_dim=128,
-            max_decoder_steps=1000,
-            decoder_rnn_dim=512,
-            p_decoder_dropout=0.1,
-            p_attention_dropout=0.1,
-            attention_rnn_dim=512,
-            attention_dim=64,
-            attention_location_n_filters=16,
-            attention_location_kernel_size=15,
-        )
-        t2_postnet = nemo_tts.Tacotron2Postnet(
-            n_mel_channels=64, postnet_embedding_dim=256, postnet_kernel_size=5, postnet_n_convolutions=3,
-        )
-        t2_loss = nemo_tts.Tacotron2Loss()
-        makegatetarget = nemo_tts.MakeGate()
-
-        # DAG
-        audio, audio_len, transcript, transcript_len = data_layer()
-        spec_target, spec_target_len = preprocessing(input_signal=audio, length=audio_len)
-
-        transcript_embedded = text_embedding(char_phone=transcript)
-        transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,)
-        mel_decoder, gate, _ = t2_dec(
-            char_phone_encoded=transcript_encoded, encoded_length=transcript_len, mel_target=spec_target,
-        )
-        mel_postnet = t2_postnet(mel_input=mel_decoder)
-        gate_target = makegatetarget(mel_target=spec_target, target_len=spec_target_len)
-        loss_t = t2_loss(
-            mel_out=mel_decoder,
-            mel_out_postnet=mel_postnet,
-            gate_out=gate,
-            mel_target=spec_target,
-            gate_target=gate_target,
-            target_len=spec_target_len,
-            seq_len=audio_len,
-        )
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss_t], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
-        )
-        # Instantiate an optimizer to perform `train` action
-        neural_factory = nemo.core.NeuralModuleFactory(
-            backend=nemo.core.Backend.PyTorch, local_rank=None, create_tb_writer=False,
-        )
-        optimizer = neural_factory.get_trainer()
-        optimizer.train(
-            [loss_t], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
-
-    def test_waveglow_training(self):
-        data_layer = nemo_tts.AudioDataLayer(manifest_filepath=self.manifest_filepath, n_segments=4000, batch_size=4,)
-        preprocessing = nemo_asr.AudioToMelSpectrogramPreprocessor(
-            window_size=None,
-            window_stride=None,
-            n_window_size=512,
-            n_window_stride=128,
-            normalize=None,
-            preemph=None,
-            dither=0,
-            mag_power=1.0,
-            pad_value=-11.52,
-        )
-        waveglow = nemo_tts.WaveGlowNM(
-            n_mel_channels=64,
-            n_flows=6,
-            n_group=4,
-            n_early_every=4,
-            n_early_size=2,
-            n_wn_layers=4,
-            n_wn_channels=256,
-            wn_kernel_size=3,
-        )
-        waveglow_loss = nemo_tts.WaveGlowLoss()
-
-        # DAG
-        audio, audio_len, = data_layer()
-        spec_target, _ = preprocessing(input_signal=audio, length=audio_len)
-
-        z, log_s_list, log_det_W_list = waveglow(mel_spectrogram=spec_target, audio=audio)
-        loss_t = waveglow_loss(z=z, log_s_list=log_s_list, log_det_W_list=log_det_W_list)
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[loss_t], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
-        )
-        # Instantiate an optimizer to perform `train` action
-        neural_factory = nemo.core.NeuralModuleFactory(
-            backend=nemo.core.Backend.PyTorch, local_rank=None, create_tb_writer=False,
-        )
-        optimizer = neural_factory.get_trainer()
-        optimizer.train(
-            [loss_t], callbacks=[callback], optimizer="sgd", optimization_params={"num_epochs": 10, "lr": 0.0003},
-        )
diff --git a/tests/asr/__init__.py b/tests/unit/__init__.py
similarity index 100%
rename from tests/asr/__init__.py
rename to tests/unit/__init__.py
diff --git a/tests/core/__init__.py b/tests/unit/core/__init__.py
similarity index 100%
rename from tests/core/__init__.py
rename to tests/unit/core/__init__.py
diff --git a/tests/unit/core/neural_graph/test_neural_graph_binding.py b/tests/unit/core/neural_graph/test_neural_graph_binding.py
new file mode 100644
index 000000000000..6ca5a10bf5d7
--- /dev/null
+++ b/tests/unit/core/neural_graph/test_neural_graph_binding.py
@@ -0,0 +1,161 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import NeuralGraph, OperationMode
+from nemo.core.neural_types import NeuralTypeComparisonResult
+from nemo.utils.neural_graph.graph_outputs import GraphOutputs
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestGraphOutputs:
+    @pytest.mark.unit
+    def test_graph_outputs_binding1(self):
+        # Create modules.
+        data_source = RealFunctionDataLayer(n=100, batch_size=1)
+        tn = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        with NeuralGraph() as g:
+            # Create the graph by connnecting the modules.
+            x, y = data_source()
+            y_pred = tn(x=x)
+            lss = loss(predictions=y_pred, target=y)
+
+        # Test default binding.
+        bound_outputs = GraphOutputs(g.tensors)
+
+        bound_outputs.bind([x, y])
+        bound_outputs.bind([y_pred])
+        bound_outputs.bind([lss])
+
+        # Delete not allowed.
+        with pytest.raises(TypeError):
+            del bound_outputs["loss"]
+
+        assert len(bound_outputs) == 4
+        assert len(bound_outputs.tensors) == 4
+        assert len(bound_outputs.tensor_list) == 4
+
+        defs = bound_outputs.definitions
+        assert defs["x"].compare(data_source.output_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert defs["y"].compare(data_source.output_ports["y"]) == NeuralTypeComparisonResult.SAME
+        assert defs["y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert defs["loss"].compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
+
+        with pytest.raises(KeyError):
+            _ = defs["lss"]
+
+        # Bound manually.
+        bound_outputs["my_prediction"] = y_pred
+        bound_outputs["my_loss"] = lss
+
+        # Delete not allowed.
+        with pytest.raises(TypeError):
+            del bound_outputs["my_prediction"]
+
+        assert len(bound_outputs) == 2
+        defs = bound_outputs.definitions
+        assert defs["my_prediction"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert defs["my_loss"].compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
+
+        with pytest.raises(KeyError):
+            _ = defs["x"]
+
+    @pytest.mark.unit
+    def test_graph_outputs_binding2(self):
+        # Create modules.
+        data_source = RealFunctionDataLayer(n=100, batch_size=1, name="tgo2_ds")
+        tn = TaylorNet(dim=4, name="tgo2_tn")
+        loss = MSELoss(name="tgo2_loss")
+
+        # Test default binding.
+        with NeuralGraph(operation_mode=OperationMode.training) as g1:
+            # Create the graph by connnecting the modules.
+            x, y = data_source()
+            y_pred = tn(x=x)
+            lss = loss(predictions=y_pred, target=y)
+
+        assert len(g1.outputs) == 4
+        # Test ports.
+        for (module, port, tensor) in [
+            (data_source, "x", x),
+            (data_source, "y", y),
+            (tn, "y_pred", y_pred),
+            (loss, "loss", lss),
+        ]:
+            # Compare definitions - from outputs.
+            assert g1.outputs[port].ntype.compare(module.output_ports[port]) == NeuralTypeComparisonResult.SAME
+            # Compare definitions - from output_ports.
+            assert g1.output_ports[port].compare(module.output_ports[port]) == NeuralTypeComparisonResult.SAME
+            # Compare definitions - from output_tensors.
+            assert g1.output_tensors[port].compare(module.output_ports[port]) == NeuralTypeComparisonResult.SAME
+            # Make sure that tensor was bound, i.e. input refers to the same object instance!
+            assert g1.output_tensors[port] is tensor
+
+        # Test manual binding.
+        g1.outputs["my_prediction"] = y_pred
+        g1.outputs["my_loss"] = lss
+
+        assert len(g1.outputs) == 2
+        assert g1.output_tensors["my_prediction"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert g1.output_tensors["my_loss"].compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
+
+        # Finally, make sure that the user cannot "bind" "output_ports"!
+        with pytest.raises(TypeError):
+            g1.output_ports["my_prediction"] = y_pred
+
+    @pytest.mark.unit
+    def test_graph_inputs_binding1_default(self):
+        # Create modules.
+        tn = TaylorNet(dim=4, name="tgi1_tn")
+        loss = MSELoss(name="tgi1_loss")
+
+        # Test default binding.
+        with NeuralGraph() as g1:
+            y_pred = tn(x=g1)
+            lss = loss(predictions=y_pred, target=g1)
+
+        assert len(g1.inputs) == 2
+        assert g1.input_ports["x"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g1.input_ports["target"].compare(loss.input_ports["target"]) == NeuralTypeComparisonResult.SAME
+
+    @pytest.mark.unit
+    def test_graph_inputs_binding2_manual(self):
+        # Create modules.
+        tn = TaylorNet(dim=4, name="tgi2_tn")
+        loss = MSELoss(name="tgi2_loss")
+
+        # Test "manual" binding.
+        with NeuralGraph() as g1:
+            # Bind the "x" input to tn.
+            g1.inputs["i"] = tn.input_ports["x"]
+            y_pred = tn(x=g1.inputs["i"])
+            # Bing the "target" input to loss.
+            g1.inputs["t"] = loss.input_ports["target"]
+            lss = loss(predictions=y_pred, target=g1.inputs["t"])
+
+        assert len(g1.inputs) == 2
+        assert g1.input_ports["i"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g1.input_ports["t"].compare(loss.input_ports["target"]) == NeuralTypeComparisonResult.SAME
+
+        # Finally, make sure that the user cannot "bind" "input_ports"!
+        with pytest.raises(TypeError):
+            g1.input_ports["my_prediction"] = y_pred
diff --git a/tests/unit/core/neural_graph/test_neural_graph_import_export.py b/tests/unit/core/neural_graph/test_neural_graph_import_export.py
new file mode 100644
index 000000000000..b41370b99e72
--- /dev/null
+++ b/tests/unit/core/neural_graph/test_neural_graph_import_export.py
@@ -0,0 +1,65 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import NeuralGraph, OperationMode
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralGraphImportExport:
+    """
+        Class testing Neural Graph configuration import/export.
+    """
+
+    @pytest.mark.unit
+    def test_graph_simple_import_export(self, tmpdir):
+        """
+            Tests whether the Neural Module can instantiate a simple module by loading a configuration file.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgio1_dl")
+        tn = TaylorNet(dim=4, name="tgio1_tn")
+        loss = MSELoss(name="tgio1_loss")
+
+        # Create the graph.
+        with NeuralGraph(operation_mode=OperationMode.training) as g1:
+            x, t = dl()
+            p = tn(x=x)
+            _ = loss(predictions=p, target=t)
+
+        # Serialize graph
+        serialized_g1 = g1.serialize()
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("simple_graph.yml"))
+
+        # Export graph to file.
+        g1.export_to_config(tmp_file_name)
+
+        # Create the second graph - import!
+        g2 = NeuralGraph.import_from_config(tmp_file_name, reuse_existing_modules=True)
+        serialized_g2 = g2.serialize()
+
+        # Must be the same.
+        assert serialized_g1 == serialized_g2
diff --git a/tests/unit/core/neural_graph/test_neural_graph_nesting.py b/tests/unit/core/neural_graph/test_neural_graph_nesting.py
new file mode 100644
index 000000000000..09340d5aaee2
--- /dev/null
+++ b/tests/unit/core/neural_graph/test_neural_graph_nesting.py
@@ -0,0 +1,503 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+import torch
+
+from nemo.backends.pytorch.actions import PtActions
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import EvaluatorCallback, NeuralGraph, OperationMode, SimpleLossLoggerCallback
+from nemo.core.neural_types import NeuralTypeComparisonResult
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralGraphNesting:
+    @pytest.mark.unit
+    def test_module_nesting1_change_operation_modes(self):
+        """ 
+            Tests whether invalid nesting (i.e. nesting of graphs with incompatible modes) throw exeptions.
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=10, batch_size=1)
+
+        with NeuralGraph(operation_mode=OperationMode.both):
+            _, _ = dl()
+            assert dl.operation_mode == OperationMode.both
+
+        with NeuralGraph(operation_mode=OperationMode.training):
+            _, _ = dl()
+            assert dl.operation_mode == OperationMode.training
+
+        with NeuralGraph(operation_mode=OperationMode.evaluation):
+            _, _ = dl()
+            assert dl.operation_mode == OperationMode.evaluation
+
+    @pytest.mark.unit
+    def test_graph_nesting2_possible_operation_modes(self):
+        """ 
+            Tests whether invalid nesting (i.e. nesting of graphs with incompatible modes) throw exeptions.
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=10, batch_size=1)
+
+        with NeuralGraph(operation_mode=OperationMode.both) as both:
+            _, _ = dl()
+
+        with NeuralGraph(operation_mode=OperationMode.training) as training:
+            _, _ = dl()
+
+        with NeuralGraph(operation_mode=OperationMode.evaluation) as inference:
+            _, _ = dl()
+
+        # Allowed operations.
+        # Can nest 'both' into 'training'.
+        with NeuralGraph(operation_mode=OperationMode.training):
+            _, _ = both()
+
+        # Can nest 'both' into 'inference'.
+        with NeuralGraph(operation_mode=OperationMode.evaluation):
+            _, _ = both()
+
+        # Can nest 'training' into 'training'.
+        with NeuralGraph(operation_mode=OperationMode.training):
+            _, _ = training()
+
+        # Can nest 'inference' into 'inference'.
+        with NeuralGraph(operation_mode=OperationMode.evaluation):
+            _, _ = inference()
+
+        # Can nest 'both' into 'both'.
+        with NeuralGraph(operation_mode=OperationMode.both):
+            _, _ = both()
+
+        # Operations not allowed.
+        # Cannot nest 'inference' into 'training'.
+        with pytest.raises(TypeError):
+            with NeuralGraph(operation_mode=OperationMode.training):
+                _, _ = inference()
+
+        # Cannot nest 'training' into 'inference'.
+        with pytest.raises(TypeError):
+            with NeuralGraph(operation_mode=OperationMode.evaluation):
+                _, _ = training()
+
+        # Cannot nest 'training' into 'both'.
+        with pytest.raises(TypeError):
+            with NeuralGraph(operation_mode=OperationMode.both):
+                _, _ = training()
+
+        # Cannot nest 'inference' into 'both'.
+        with pytest.raises(TypeError):
+            with NeuralGraph(operation_mode=OperationMode.both):
+                _, _ = inference()
+
+    @pytest.mark.unit
+    def test_graph_nesting3_topology_copy_one_module_default_outputs(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: binding of outputs, default port names.
+        """
+        dl = RealFunctionDataLayer(n=10, batch_size=1, name="tgn3_dl")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn3_g1") as g1:
+            xg1, tg1 = dl()
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn3_g2") as g2:
+            xg2, tg2 = g1()
+
+        # We expect that both graphs will have the same steps.
+        assert len(g1.steps) == len(g2.steps)
+        assert g1.steps[0] == g2.steps[0]
+
+        # Make sure that the modules are the same.
+        assert len(g1) == len(g2)
+        assert g1["tgn3_dl"] is dl
+        assert g2["tgn3_dl"] is dl
+        assert g1["tgn3_dl"] is g2["tgn3_dl"]
+
+        # Make sure that outputs are ok.
+        assert len(g1.outputs) == len(g2.outputs)
+        for port in ["x", "y"]:
+            # Definitions are the same: test two "paths" of accessing the type.
+            assert g1.outputs[port].ntype.compare(g1.output_ports[port]) == NeuralTypeComparisonResult.SAME
+
+            assert g1.output_ports[port].compare(g2.output_ports[port]) == NeuralTypeComparisonResult.SAME
+            assert g1.outputs[port].ntype.compare(g2.outputs[port].ntype) == NeuralTypeComparisonResult.SAME
+            # At the same time - those have to be two different port objects!
+            assert g1.outputs[port] is not g2.outputs[port]
+            # And different tensors (as those are "internally produced tensors"!)
+            assert g1.output_tensors[port] is not g2.output_tensors[port]
+
+    @pytest.mark.unit
+    def test_graph_nesting4_topology_copy_one_module_manual_outputs(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: binding of outputs, manual port names.
+        """
+
+        dl = RealFunctionDataLayer(n=10, batch_size=1, name="tgn4_dl")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn4_g1") as g1:
+            xg1, tg1 = dl()
+            # Set port binding manually, with different names - and their number!
+            g1.outputs["inner_x"] = xg1
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn4_g2") as g2:
+            xg2 = g1()
+            # Set port binding manually, with different names - and their number!
+            g2.outputs["outer_x"] = xg2
+
+        # We expect that both graphs will have the same steps.
+        assert len(g1.steps) == len(g2.steps)
+        assert g1.steps[0] == g2.steps[0]
+
+        # Make sure that the modules are the same.
+        assert len(g1) == len(g2)
+        assert g1["tgn4_dl"] is g2["tgn4_dl"]
+
+        # Make sure that outputs are ok.
+        assert len(g1.outputs) == len(g2.outputs)
+        for inter_port, outer_port in [("inner_x", "outer_x")]:
+            # Definitions are the same: test two "paths" of accessing the type.
+            assert g1.output_ports[inter_port].compare(g2.output_ports[outer_port]) == NeuralTypeComparisonResult.SAME
+            assert (
+                g1.outputs[inter_port].ntype.compare(g2.outputs[outer_port].ntype) == NeuralTypeComparisonResult.SAME
+            )
+            # At the same time - those have to be two different port objects!
+            assert g1.outputs[inter_port] is not g2.outputs[outer_port]
+            # And different tensors (as those are "internally produced tensors"!)
+            assert g1.output_tensors[inter_port] is not g2.output_tensors[outer_port]
+
+    @pytest.mark.unit
+    def test_graph_nesting4_1_topology_copy_one_module_manual_outputs_bound_only_in_inner(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: binding of outputs, manual port names - only in the inner graph.
+            Testing whether outputs of outer graph have the manually bound names.
+        """
+
+        dl = RealFunctionDataLayer(n=10, batch_size=1, name="tgn41_dl")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn41_g1") as g1:
+            xg1, tg1 = dl()
+            # Set port binding manually, with different names - and their number!
+            g1.outputs["inner_x"] = xg1
+            g1.outputs["inner_t"] = tg1
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn41_g2") as g2:
+            # Get them as a tuple.
+            outputs = g1()
+
+        # Retrieve tensors from tuple.
+        assert outputs._fields[0] == "inner_x"
+        assert outputs._fields[1] == "inner_t"
+        xg2 = outputs.inner_x
+        tg2 = outputs.inner_t
+
+        # Make sure that outer graph has objects of the same names
+        assert len(g1.outputs) == len(g2.outputs)
+        for inter_port, outer_port in [("inner_x", "inner_x"), ("inner_t", "inner_t")]:
+            # Definitions are the same: test two "paths" of accessing the type.
+            assert g1.output_ports[inter_port].compare(g2.output_ports[outer_port]) == NeuralTypeComparisonResult.SAME
+            assert (
+                g1.outputs[inter_port].ntype.compare(g2.outputs[outer_port].ntype) == NeuralTypeComparisonResult.SAME
+            )
+            # At the same time - those have to be two different port objects!
+            assert g1.outputs[inter_port] is not g2.outputs[outer_port]
+            # And different tensors (as those are "internally produced tensors"!)
+            assert g1.output_tensors[inter_port] is not g2.output_tensors[outer_port]
+
+    @pytest.mark.unit
+    def test_graph_nesting5_topology_copy_one_module_default_inputs(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: binding of inputs, default port names.
+        """
+        tn = TaylorNet(dim=4, name="tgn5_tn")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training) as g1:
+            y_pred1 = tn(x=g1)
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training) as g2:
+            y_pred2 = g1(x=g2)
+
+        # We expect that both graphs will have the same steps.
+        assert len(g1.steps) == len(g2.steps)
+        assert g1.steps[0] == g2.steps[0]
+
+        # Make sure that the modules are the same.
+        assert len(g1) == len(g2)
+        assert g1["tgn5_tn"] is g2["tgn5_tn"]
+
+        # Make sure that inputs are ok.
+        assert len(g1.inputs) == len(g2.inputs)
+        assert g1.input_ports["x"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g2.input_ports["x"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        # At the same time - those point to the same step-module-port.
+        assert g1.inputs.has_binding(0, "x")
+        assert g2.inputs.has_binding(0, "x")
+        assert g1.inputs["x"].consumers[0].step_number == 0
+        assert g1.inputs["x"].consumers[0].module_name == tn.name
+        assert g1.inputs["x"].consumers[0].port_name == "x"
+        assert g2.inputs["x"].consumers[0].step_number == 0
+        assert g2.inputs["x"].consumers[0].module_name == tn.name
+        assert g2.inputs["x"].consumers[0].port_name == "x"
+
+        # Make sure that outputs are ok.
+        assert len(g1.outputs) == len(g2.outputs)
+        assert g1.output_ports["y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert g1.output_ports["y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        # At the same time - those have to be two different port objects!
+        assert g1.outputs["y_pred"] is not g2.outputs["y_pred"]
+        # And different tensors (as those are "internally produced tensors"!)
+        assert g1.output_tensors["y_pred"] is y_pred1
+        assert g2.output_tensors["y_pred"] is y_pred2
+        assert y_pred1 is not y_pred2
+
+    @pytest.mark.unit
+    def test_graph_nesting6_topology_copy_one_module_manual_inputs(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: binding of inputs, manual port names.
+        """
+        tn = TaylorNet(dim=4, name="tgn6_tn")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn6_g1") as g1:
+            # Copy input type.
+            g1.inputs["inner_x"] = tn.input_ports["x"]
+            # Bind the input port.
+            y_pred1 = tn(x=g1.inputs["inner_x"])
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn6_g2") as g2:
+            # Copy input type.
+            g2.inputs["outer_x"] = g1.input_ports["inner_x"]
+            # Bind the input port.
+            y_pred2 = g1(inner_x=g2.inputs["outer_x"])
+
+        # We expect that both graphs will have the same steps.
+        assert len(g1.steps) == len(g2.steps)
+        assert g1.steps[0] == g2.steps[0]
+
+        # Make sure that the modules are the same.
+        assert len(g1) == len(g2)
+        assert g1["tgn6_tn"] is g2["tgn6_tn"]
+
+        # Make sure that inputs are ok.
+        assert len(g1.inputs) == len(g2.inputs)
+        assert g1.input_ports["inner_x"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g2.input_ports["outer_x"].compare(tn.input_ports["x"]) == NeuralTypeComparisonResult.SAME
+        # At the same time - those point to the same module-port.
+        assert g1.inputs.has_binding(0, "x")
+        assert g2.inputs.has_binding(0, "x")
+        assert g1.inputs["inner_x"].consumers[0].step_number == 0
+        assert g1.inputs["inner_x"].consumers[0].module_name == tn.name
+        assert g1.inputs["inner_x"].consumers[0].port_name == "x"
+        assert g2.inputs["outer_x"].consumers[0].step_number == 0
+        assert g2.inputs["outer_x"].consumers[0].module_name == tn.name
+        assert g2.inputs["outer_x"].consumers[0].port_name == "x"
+
+        # Make sure that outputs are ok.
+        assert len(g1.outputs) == len(g2.outputs)
+        assert g1.output_ports["y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert g1.output_ports["y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        # At the same time - those have to be two different port objects!
+        assert g1.outputs["y_pred"] is not g2.outputs["y_pred"]
+        # And different tensors (as those are "internally produced tensors"!)
+        assert g1.output_tensors["y_pred"] is y_pred1
+        assert g2.output_tensors["y_pred"] is y_pred2
+        assert y_pred1 is not y_pred2
+
+    @pytest.mark.unit
+    def test_graph_nesting7_topology_copy_one_module_all_manual_connect(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: manual binding of inputs and outputs, connects to other modules.
+        """
+        ds = RealFunctionDataLayer(n=10, batch_size=1, name="tgn7_ds")
+        tn = TaylorNet(dim=4, name="tgn7_tn")
+        loss = MSELoss(name="tgn7_loss")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn7_g1") as g1:
+            # Copy the input type.
+            g1.inputs["inner_x"] = tn.input_ports["x"]
+            # Manually bind the input port.
+            y_pred1 = tn(x=g1.inputs["inner_x"])
+            # Manually bind the output port.
+            g1.outputs["inner_y_pred"] = y_pred1
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn7_g2") as g2:
+            x, y = ds()
+            y_pred2 = g1(inner_x=x)
+            lss = loss(predictions=y_pred2, target=y)
+
+        # Check steps.
+        assert len(g2.steps) == 3
+        assert g2.steps[1] == g1.steps[0]
+
+        # Make sure that the modules are the same.
+        assert len(g2) == 3
+        assert g2["tgn7_tn"] is g1["tgn7_tn"]
+
+        # Make sure that inputs are ok.
+        assert len(g2.inputs) == 0
+
+        # Check outputs.
+        assert len(g2.outputs) == 4
+        assert g2.output_ports["x"].compare(ds.output_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g2.output_ports["y"].compare(ds.output_ports["y"]) == NeuralTypeComparisonResult.SAME
+        assert g2.output_ports["loss"].compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
+        # The manually bound name!
+        assert g2.output_ports["inner_y_pred"].compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+
+        # Check the output tensors.
+        assert len(g2.output_tensors) == 4
+        assert g2.output_tensors["x"] == x
+        assert g2.output_tensors["y"] == y
+        assert g2.output_tensors["loss"] == lss
+        # The manually bound name!
+        assert g2.output_tensors["inner_y_pred"] == y_pred2
+
+        # Check the "internal tensors".
+        assert y_pred2 is not y_pred1
+        assert g2.tensors[0]["x"] == x
+        assert g2.tensors[0]["y"] == y
+        assert g2.tensors[2]["loss"] == lss
+        # Internally the name "y_pred" is used, not the "bound output name": "inner_y_pred"!
+        assert g2.tensors[1]["y_pred"] == y_pred2
+
+        # Update g2: manually bound only one output.
+        with g2:
+            g2.outputs["outer_loss"] = lss
+
+        # Make sure that outputs are ok.
+        assert len(g2.outputs) == 1
+        assert g2.output_ports["outer_loss"].compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
+        assert g2.output_tensors["outer_loss"] is lss
+
+    @pytest.mark.unit
+    def test_graph_nesting8_topology_copy_two_modules(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: manual binding of inputs and outputs in the inner graph.
+        """
+        ds = RealFunctionDataLayer(n=10, batch_size=1, name="tgn8_ds")
+        tn = TaylorNet(dim=4, name="tgn8_tn")
+        loss = MSELoss(name="tgn8_loss")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn8_g1") as g1:
+            # Create input port definitions.
+            g1.inputs["inner_x"] = tn.input_ports["x"]
+            g1.inputs["inner_target"] = loss.input_ports["target"]
+
+            # Connect modules and bound inputs.
+            y_pred1 = tn(x=g1.inputs["inner_x"])
+            lss1 = loss(predictions=y_pred1, target=g1.inputs["inner_target"])
+
+            # Manually bind the output ports.
+            g1.outputs["inner_y_pred"] = y_pred1
+            g1.outputs["inner_loss"] = lss1
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn8_g2") as g2:
+            x, y = ds()
+            # Nest the inner graph.
+            y_pred2, lss2 = g1(inner_x=x, inner_target=y)
+            # Manually bind the output ports.
+            g2.outputs["outer_y_pred"] = y_pred2
+            g2.outputs["outer_loss"] = lss2
+
+        # Check modules and steps.
+        assert len(g2.steps) == 3
+        assert len(g2) == 3
+
+        # Check the output tensors.
+        assert len(g2.output_tensors) == 2
+        assert g2.output_tensors["outer_y_pred"] == y_pred2
+        assert g2.output_tensors["outer_loss"] == lss2
+
+        # Check the "internal tensors".
+        assert y_pred2 is not y_pred1
+        assert lss2 is not lss1
+        assert g2.tensors[0]["x"] == x
+        assert g2.tensors[0]["y"] == y
+        # Internally the name "y_pred" is used, not the "bound output name": "inner_y_pred"!
+        assert g2.tensors[1]["y_pred"] == y_pred2
+        # Analogically with "loss".
+        assert g2.tensors[2]["loss"] == lss2
+
+    @pytest.mark.unit
+    def test_graph_nesting9_topology_copy_whole_graph(self):
+        """
+            Test whether when nesting of one graph into another will result in copy of the graph topology (tensors).
+            Case: manual binding of inputs and outputs in the inner graph. Manual binding of outer graph outputs.
+        """
+        ds = RealFunctionDataLayer(n=10, batch_size=1, name="tgn9_ds")
+        tn = TaylorNet(dim=4, name="tgn9_tn")
+        loss = MSELoss(name="tgn9_loss")
+
+        # Create the "inner graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn9_g1") as g1:
+            # Connect modules.
+            x, y = ds()
+            y_pred1 = tn(x=x)
+            lss1 = loss(predictions=y_pred1, target=y)
+
+            # Manually bind the output ports.
+            g1.outputs["inner_y_pred"] = y_pred1
+            g1.outputs["inner_loss"] = lss1
+
+        # Create the "outer graph".
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgn9_g2") as g2:
+            y_pred2, lss2 = g1()
+            # Manually bind the output ports.
+            g2.outputs["outer_y_pred"] = y_pred2
+            g2.outputs["outer_loss"] = lss2
+
+        # Check modules and steps.
+        assert len(g2.steps) == 3
+        assert len(g2) == 3
+
+        # Check the output tensors.
+        assert len(g2.output_tensors) == 2
+        assert g2.output_tensors["outer_y_pred"] == y_pred2
+        assert g2.output_tensors["outer_loss"] == lss2
+
+        # Check the "internal tensors".
+        assert y_pred2 is not y_pred1
+        assert lss2 is not lss1
+        assert g2.tensors[0]["x"].ntype.compare(ds.output_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert g2.tensors[0]["y"].ntype.compare(ds.output_ports["y"]) == NeuralTypeComparisonResult.SAME
+        # Internally the name "y_pred" is used, not the "bound output name": "inner_y_pred"!
+        assert g2.tensors[1]["y_pred"].ntype.compare(tn.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        # Analogically with "loss".
+        assert g2.tensors[2]["loss"].ntype.compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
diff --git a/tests/unit/core/neural_graph/test_neural_graph_serialization.py b/tests/unit/core/neural_graph/test_neural_graph_serialization.py
new file mode 100644
index 000000000000..8956c2b5052b
--- /dev/null
+++ b/tests/unit/core/neural_graph/test_neural_graph_serialization.py
@@ -0,0 +1,307 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import NeuralGraph, OperationMode
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralGraphSerialization:
+    @pytest.mark.unit
+    def test_graph_serialization_1_simple_graph_no_binding(self):
+        """ 
+            Tests whether serialization of a simple graph works.
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgs1_dl")
+        tn = TaylorNet(dim=4, name="tgs1_tn")
+        loss = MSELoss(name="tgs1_loss")
+
+        # Create the graph.
+        with NeuralGraph(operation_mode=OperationMode.training, name="g1") as g1:
+            x, t = dl()
+            prediction1 = tn(x=x)
+            _ = loss(predictions=prediction1, target=t)
+
+        # Serialize the graph.
+        serialized_g1 = g1.serialize()
+
+        # Create a second graph - deserialize with reusing.
+        g2 = NeuralGraph.deserialize(serialized_g1, reuse_existing_modules=True, name="g2")
+        serialized_g2 = g2.serialize()
+
+        # Must be the same.
+        assert serialized_g1 == serialized_g2
+
+        # Delete modules.
+        del dl
+        del tn
+        del loss
+        # Delete graphs as they contain "hard" references to those modules.
+        del g1
+        del g2
+
+        # Create a third graph - deserialize without reusing, should create new modules.
+        g3 = NeuralGraph.deserialize(serialized_g1, reuse_existing_modules=False, name="g3")
+        serialized_g3 = g3.serialize()
+
+        # Must be the same.
+        assert serialized_g1 == serialized_g3
+
+        # Deserialize graph - without reusing modules not allowed.
+        with pytest.raises(KeyError):
+            _ = NeuralGraph.deserialize(serialized_g1, reuse_existing_modules=False)
+
+    @pytest.mark.unit
+    def test_graph_serialization_2_simple_graph_output_binding(self):
+        """ 
+            Tests whether serialization of a simple graph with output binding works.
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgs2_dl")
+        tn = TaylorNet(dim=4, name="tgs2_tn")
+        loss = MSELoss(name="tgs2_loss")
+
+        # Create the graph.
+        with NeuralGraph(operation_mode=OperationMode.evaluation) as g1:
+            x, t = dl()
+            prediction1 = tn(x=x)
+            _ = loss(predictions=prediction1, target=t)
+        # Manually bind the selected outputs.
+        g1.outputs["ix"] = x
+        g1.outputs["te"] = t
+        g1.outputs["prediction"] = prediction1
+
+        # Serialize graph
+        serialized_g1 = g1.serialize()
+
+        # Create the second graph - deserialize with reusing.
+        g2 = NeuralGraph.deserialize(serialized_g1, reuse_existing_modules=True)
+        serialized_g2 = g2.serialize()
+
+        # Must be the same.
+        assert serialized_g1 == serialized_g2
+
+    @pytest.mark.unit
+    def test_graph_serialization_3_simple_model_input_output_binding(self):
+        """ 
+            Tests whether serialization of a simple graph with input and output binding works.
+        """
+        # Instantiate the necessary neural modules.
+        tn = TaylorNet(dim=4, name="tgs3_tn")
+
+        # Create "model".
+        with NeuralGraph(operation_mode=OperationMode.both, name="model") as model:
+            # Manually bind input port: "input" -> "x"
+            model.inputs["input"] = tn.input_ports["x"]
+            # Add module to graph and bind it input port 'x'.
+            y = tn(x=model.inputs["input"])
+            # Manual output bind.
+            model.outputs["output"] = y
+
+        # Serialize the "model".
+        serialized_model1 = model.serialize()
+
+        # Create the second graph - deserialize with reusing.
+        model2 = NeuralGraph.deserialize(serialized_model1, reuse_existing_modules=True)
+        serialized_model2 = model2.serialize()
+
+        # Must be the same.
+        assert serialized_model1 == serialized_model2
+
+    @pytest.mark.unit
+    def test_graph_serialization_4_graph_after_nesting_with_default_binding_reuse_modules(self):
+        """ 
+            Tests whether serialization works in the case when we serialize a graph after a different graph
+            was nested in it, with additionally bound input and output binding works (default port names).
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgs4_dl")
+        tn = TaylorNet(dim=4, name="tgs4_tn")
+        loss = MSELoss(name="tgs4_loss")
+
+        # Create "model".
+        with NeuralGraph(operation_mode=OperationMode.both, name="model") as model:
+            # Add module to graph and bind it input port 'x'.
+            y = tn(x=model)
+            # NOTE: For some reason after this call both the "tgs4_tn" and "model" objects
+            # remains on the module/graph registries.
+            # (So somewhere down there remains a strong reference to module or graph).
+            # This happens ONLY when passing graph as argument!
+            # (Check out the next test which actually removes module and graph!).
+            # Still, that is not an issue, as we do not expect the users
+            # to delete and recreate modules in their "normal" applications.
+
+        # Build the "training graph" - using the model copy.
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgs4_training") as training:
+            # Add modules to graph.
+            x, t = dl()
+            # Incorporate modules from the existing "model" graph.
+            p = model(x=x)
+            lss = loss(predictions=p, target=t)
+
+        # Serialize the "training graph".
+        serialized_training = training.serialize()
+
+        # Create the second graph - deserialize withoput "module reusing".
+        training2 = NeuralGraph.deserialize(serialized_training, reuse_existing_modules=True)
+        serialized_training2 = training2.serialize()
+
+        # Must be the same.
+        assert serialized_training == serialized_training2
+
+    @pytest.mark.unit
+    def test_graph_serialization_5_graph_after_nesting_without_reusing(self):
+        """ 
+            Tests whether serialization works in the case when we serialize a graph after a different graph
+            was nested in it, with additionally bound input and output binding works (default port names).
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgs5_dl")
+        tn = TaylorNet(dim=4, name="tgs511_tn")
+        loss = MSELoss(name="tgs5_loss")
+
+        # Create "model".
+        with NeuralGraph(operation_mode=OperationMode.both, name="tgs5_model") as model:
+            # Manually bind input port: "input" -> "x"
+            model.inputs["input"] = tn.input_ports["x"]
+            # Add module to graph and bind it input port 'x'.
+            y = tn(x=model.inputs["input"])
+            # Use the default output name.
+
+        # Build the "training graph" - using the model copy.
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgs5_training") as training:
+            # Add modules to graph.
+            x, t = dl()
+            # Incorporate modules from the existing "model" graph.
+            p = model(input=x)
+            lss = loss(predictions=p, target=t)
+
+        # Serialize the "training graph".
+        serialized_training = training.serialize()
+
+        # Delete everything.
+        del dl
+        del tn
+        del loss
+        del model
+        del training
+
+        # Create the second graph - deserialize withoput "module reusing".
+        training2 = NeuralGraph.deserialize(serialized_training)
+        serialized_training2 = training2.serialize()
+
+        # Must be the same.
+        assert serialized_training == serialized_training2
+
+    @pytest.mark.unit
+    def test_graph_serialization_6_graph_after_nesting_with_manual_binding(self):
+        """ 
+            Tests whether serialization works in the case when we serialize a graph after a different graph
+            was nested in it, with additionally bound input and output binding works (manual port names).
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="tgs6_dl")
+        tn = TaylorNet(dim=4, name="tgs6_tn")
+        loss = MSELoss(name="tgs6_loss")
+
+        # Create "model".
+        with NeuralGraph(operation_mode=OperationMode.both, name="tgs6_model") as model:
+            # Manually bind input port: "input" -> "x"
+            model.inputs["input"] = tn.input_ports["x"]
+            # Add module to graph and bind it input port 'x'.
+            y = tn(x=model.inputs["input"])
+            # Manual output bind.
+            model.outputs["output"] = y
+
+        # Serialize "model".
+        serialized_model = model.serialize()
+
+        # Delete model-related stuff.
+        del model
+        del tn
+
+        # Deserialize the "model copy".
+        model_copy = NeuralGraph.deserialize(serialized_model, name="tgs6_model_copy")
+
+        # Build the "training graph" - using the model copy.
+        with NeuralGraph(operation_mode=OperationMode.training, name="tgs6_training") as training:
+            # Add modules to graph.
+            x, t = dl()
+            # Incorporate modules from the existing "model" graph.
+            p = model_copy(input=x)  # Note: this output should actually be named "output", not "y_pred"!
+            lss = loss(predictions=p, target=t)
+
+        # Serialize the "training graph".
+        serialized_training = training.serialize()
+
+        # Delete everything.
+        del dl
+        del loss
+        del model_copy
+        del training
+
+        # Create the second graph - deserialize without "module reusing".
+        training2 = NeuralGraph.deserialize(serialized_training)
+        serialized_training2 = training2.serialize()
+
+        # Must be the same.
+        assert serialized_training == serialized_training2
+
+    @pytest.mark.unit
+    def test_graph_serialization_7_arbitrary_graph_with_loops(self):
+        """ 
+            Tests whether serialization works in the case when we serialize a graph after a different graph
+            was nested in it, with additionally bound input and output binding works (manual port names).
+        """
+        # Instantiate the necessary neural modules.
+        dl = RealFunctionDataLayer(n=100, batch_size=1, name="dl")
+        tn = TaylorNet(dim=4, name="tn")
+        loss = MSELoss(name="loss")
+
+        # Build a graph with a loop.
+        with NeuralGraph(name="graph") as graph:
+            # Add modules to graph.
+            x, t = dl()
+            # First call to TN.
+            p1 = tn(x=x)
+            # Second call to TN.
+            p2 = tn(x=p1)
+            # Take output of second, pass it to loss.
+            lss = loss(predictions=p2, target=t)
+
+        # Make sure all connections are there!
+        assert len(graph.tensor_list) == 5
+        # 4 would mean that we have overwritten the "p1" (tn->y_pred) tensor!
+
+        # Serialize the graph.
+        serialized_graph = graph.serialize()
+
+        # Create the second graph - deserialize with "module reusing".
+        graph2 = NeuralGraph.deserialize(serialized_graph, reuse_existing_modules=True)
+        serialized_graph2 = graph2.serialize()
+
+        # Must be the same.
+        assert serialized_graph == serialized_graph2
+
+        # import pdb;pdb.set_trace()
+        # print("1: \n",serialized_graph)
+        # print("2: \n",serialized_graph2)
diff --git a/tests/unit/core/neural_graph/test_neural_graphs.py b/tests/unit/core/neural_graph/test_neural_graphs.py
new file mode 100644
index 000000000000..96fac0f620c9
--- /dev/null
+++ b/tests/unit/core/neural_graph/test_neural_graphs.py
@@ -0,0 +1,119 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+
+import pytest
+from numpy import array_equal
+
+from nemo.backends import get_state_dict
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core import NeuralGraph
+from nemo.core.neural_types import NeuralTypeComparisonResult
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralGraphs:
+    @pytest.mark.unit
+    def test_explicit_graph_with_activation(self):
+        """ 
+            Tests initialization of an `explicit` graph and decoupling of graph creation from its activation. 
+            Also tests modules access.
+        """
+        # Create modules.
+        dl = RealFunctionDataLayer(n=10, batch_size=1, name="dl")
+        fx = TaylorNet(dim=4, name="fx")
+        loss = MSELoss(name="loss")
+
+        # Create the g0 graph.
+        g0 = NeuralGraph()
+
+        # Activate the "g0 graph context" - all operations will be recorded to g0.
+        with g0:
+            x, t = dl()
+            p = fx(x=x)
+            lss = loss(predictions=p, target=t)
+
+        # Assert that there are 3 modules in the graph.
+        assert len(g0) == 3
+
+        # Test access modules.
+        assert g0["dl"] is dl
+        assert g0["fx"] is fx
+        assert g0["loss"] is loss
+
+        with pytest.raises(KeyError):
+            g0["other_module"]
+
+    @pytest.mark.unit
+    def test_explicit_graph_manual_activation(self):
+        """  Tests initialization of an `explicit` graph using `manual` activation. """
+        # Create modules.
+        dl = RealFunctionDataLayer(n=10, batch_size=1)
+        fx = TaylorNet(dim=4)
+
+        # Create the g0 graph.
+        g0 = NeuralGraph()
+
+        # Activate the "g0 graph context" "manually" - all steps will be recorded to g0.
+        g0.activate()
+
+        # Define g0 - connections between the modules.
+        x, t = dl()
+        p = fx(x=x)
+
+        # Deactivate the "g0 graph context".
+        # Note that this is really optional, as long as there are no other steps to be recorded.
+        g0.deactivate()
+
+        # Assert that there are 2 modules in the graph.
+        assert len(g0) == 2
+
+    @pytest.mark.unit
+    def test_graph_save_load(self, tmpdir):
+        """
+            Tests graph saving and loading.
+        
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        dl = RealFunctionDataLayer(n=10, batch_size=1)
+        tn = TaylorNet(dim=4)
+        # Get the "original" weights.
+        weights1 = get_state_dict(tn)
+
+        # Create a simple graph.
+        with NeuralGraph() as g1:
+            x, t = dl()
+            p = tn(x=x)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.join("tgsl_g1.chkpt"))
+        # Save graph.
+        g1.save_to(tmp_file_name)
+
+        # Load graph.
+        g1.restore_from(tmp_file_name)
+
+        # Get the "restored" weights.
+        weights2 = get_state_dict(tn)
+
+        # Compare state dicts.
+        for key in weights1:
+            assert array_equal(weights1[key].cpu().numpy(), weights2[key].cpu().numpy())
diff --git a/tests/core/test_neural_modules_initialization.py b/tests/unit/core/neural_module/test_module_configuration.py
similarity index 61%
rename from tests/core/test_neural_modules_initialization.py
rename to tests/unit/core/neural_module/test_module_configuration.py
index e6d5c29a4827..f177c2a9280d 100644
--- a/tests/core/test_neural_modules_initialization.py
+++ b/tests/unit/core/neural_module/test_module_configuration.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 
 # =============================================================================
-# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,41 +18,50 @@
 # =============================================================================
 
 
-import nemo
-from tests.common_setup import NeMoUnitTest
+import pytest
 
+from nemo.core import NeuralModule
 
-class MockupModule(nemo.core.NeuralModule):
-    """
-    Mockup component class.
-    """
-
-    def __init__(self):
-        nemo.core.NeuralModule.__init__(self)
 
-
-class NeuralModuleConfigTest(NeMoUnitTest):
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralModuleConfig:
     """
         Class testing methods related to Neural Module import/export.
     """
 
-    def setUp(self) -> None:
-        super().setUp()
+    class MockupModule(NeuralModule):
+        """
+        Mockup component class.
+        """
+
+        def __init__(self):
+            NeuralModule.__init__(self)
+
+        def validate_params(self, params):
+            """ Method for accessing private method of NeuralModuce class """
+            return self._NeuralModule__validate_params(params)
 
+    def setup_method(self, method):
+        """ 
+            Setup_method is invoked for every test method of a class.
+            Mocks up the class and creates module used in all tests.
+        """
         # Mockup abstract methods.
-        MockupModule.__abstractmethods__ = set()
+        TestNeuralModuleConfig.MockupModule.__abstractmethods__ = set()
 
         # Create object.
-        self.module = MockupModule()
+        self.module = TestNeuralModuleConfig.MockupModule()
 
+    @pytest.mark.unit
     def test_build_in_types(self):
         """ Tests whether build-in types are handled."""
 
         params = {"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}
 
         # Check error output.
-        self.assertEqual(self.module._validate_params(params), True)
+        assert self.module.validate_params(params) == True
 
+    @pytest.mark.unit
     def test_nested_dict(self):
         """ Tests whether (nested) dicts are handled."""
 
@@ -64,20 +73,22 @@ def test_nested_dict(self):
         }
 
         # Check error output.
-        self.assertEqual(self.module._validate_params(params), True)
+        assert (self.module.validate_params(params), True)
 
+    @pytest.mark.unit
     def test_nested_list(self):
         """ Tests whether (nested) lists are handled."""
 
         params = {"list_outer": [[1, 2, 3, 4]]}
 
         # Check error output.
-        self.assertEqual(self.module._validate_params(params), True)
+        assert self.module.validate_params(params) == True
 
+    @pytest.mark.unit
     def test_nested_mix(self):
         """ Tests whether (nested) lists are handled."""
 
         params = {"list_outer": [{"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}]}
 
         # Check error output.
-        self.assertEqual(self.module._validate_params(params), True)
+        assert self.module.validate_params(params) == True
diff --git a/tests/unit/core/neural_module/test_module_configuration_export.py b/tests/unit/core/neural_module/test_module_configuration_export.py
new file mode 100644
index 000000000000..689d621c9c5c
--- /dev/null
+++ b/tests/unit/core/neural_module/test_module_configuration_export.py
@@ -0,0 +1,178 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+from ruamel.yaml import YAML
+
+from nemo.core import NeuralModule
+
+YAML = YAML(typ='safe')
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralModuleExport:
+    """
+        Class testing Neural Module configuration export.
+    """
+
+    class MockupSimpleModule(NeuralModule):
+        """
+        Mockup component class.
+        """
+
+        def __init__(self, a, b, c, d=False):
+            super().__init__()
+
+    def setup_method(self, method):
+        """ 
+            Setup_method is invoked for every test method of a class.
+            Mocks up the module class.
+        """
+        # Mockup abstract methods.
+        TestNeuralModuleExport.MockupSimpleModule.__abstractmethods__ = set()
+
+    @pytest.mark.unit
+    def test_simple_export(self, tmpdir):
+        """
+            Tests whether build-in types are properly exported.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # Set params: {"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}
+        params = {"a": 123, "b": 12.4, "c": "ala ma kota"}
+        module = TestNeuralModuleExport.MockupSimpleModule(**params)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("simple_export.yml"))
+        # Export.
+        module.export_to_config(tmp_file_name)
+
+        # Check the resulting config file.
+        with open(tmp_file_name, 'r') as stream:
+            exported_config = YAML.load(stream)
+
+        # Assert that it contains main sections: header and init params.
+        assert "header" in exported_config
+        assert "init_params" in exported_config
+
+        # Assert that the header contains class and spec.
+        assert "full_spec" in exported_config["header"]
+
+        # Check init params.
+        exported_init_params = exported_config["init_params"]
+        assert int(exported_init_params["a"]) == 123
+        assert float(exported_init_params["b"]) == 12.4
+        assert exported_init_params["c"] == "ala ma kota"
+        assert bool(exported_init_params["d"]) == False
+
+    @pytest.mark.unit
+    def test_nested_list_export(self, tmpdir):
+        """
+            Tests whether (nested*) lists are properly exported.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # Params: list, list of lists, list of lists of lists, None type!
+        module = TestNeuralModuleExport.MockupSimpleModule(
+            a=[123], b=[[12.4]], c=[[["ala", "ma", "kota"], "kot ma"], "ale"], d=None
+        )
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("nested_list_export.yml"))
+        # Export.
+        module.export_to_config(tmp_file_name)
+
+        # Check the resulting config file.
+        with open(tmp_file_name, 'r') as stream:
+            exported_config = YAML.load(stream)
+
+        # Assert that it contains main sections: header and init params.
+        assert "header" in exported_config
+        assert "init_params" in exported_config
+
+        # Check init params.
+        exported_init_params = exported_config["init_params"]
+        assert exported_init_params["a"][0] == 123
+        assert exported_init_params["b"][0][0] == 12.4
+        assert exported_init_params["c"][0][0][0] == "ala"
+        assert exported_init_params["c"][0][0][1] == "ma"
+        assert exported_init_params["c"][0][0][2] == "kota"
+        assert exported_init_params["c"][0][1] == "kot ma"
+        assert exported_init_params["c"][1] == "ale"
+        assert exported_init_params["d"] == None
+
+    @pytest.mark.unit
+    def test_nested_dict_export(self, tmpdir):
+        """
+            Tests whether (nested*) dictionaries are properly exported.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # Params: dict, dict with list, dict with dict, build-in.
+        module = TestNeuralModuleExport.MockupSimpleModule(
+            a={"int": 123}, b={"floats": [12.4, 71.2]}, c={"ala": {"ma": "kota", "nie_ma": "psa"}}, d=True
+        )
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("nested_dict_export.yml"))
+        # Export.
+        module.export_to_config(tmp_file_name)
+
+        # Check the resulting config file.
+        with open(tmp_file_name, 'r') as stream:
+            exported_config = YAML.load(stream)
+
+        # Assert that it contains main sections: header and init params.
+        assert "header" in exported_config
+        assert "init_params" in exported_config
+
+        # Check init params.
+        exported_init_params = exported_config["init_params"]
+        assert exported_init_params["a"]["int"] == 123
+        assert exported_init_params["b"]["floats"][0] == 12.4
+        assert exported_init_params["b"]["floats"][1] == 71.2
+        assert exported_init_params["c"]["ala"]["ma"] == "kota"
+        assert exported_init_params["c"]["ala"]["nie_ma"] == "psa"
+        assert exported_init_params["d"]
+
+    @pytest.mark.unit
+    def test_unallowed_export(self, tmpdir):
+        """
+            Tests whether unallowed types are NOT exported.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        e = Exception("some random object")
+
+        # Params: dict, dict with list, dict with dict, build-in.
+        module = TestNeuralModuleExport.MockupSimpleModule(e, False, False, False)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("unallowed_export.yml"))
+        # Assert export error.
+        with pytest.raises(ValueError):
+            module.export_to_config(tmp_file_name)
diff --git a/tests/unit/core/neural_module/test_module_configuration_import.py b/tests/unit/core/neural_module/test_module_configuration_import.py
new file mode 100644
index 000000000000..e6df548d3590
--- /dev/null
+++ b/tests/unit/core/neural_module/test_module_configuration_import.py
@@ -0,0 +1,138 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.core import NeuralModule
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeuralModuleImport:
+    """
+        Class testing Neural Module configuration export.
+    """
+
+    class FirstSimpleModule(NeuralModule):
+        """
+        Mockup component class.
+        """
+
+        def __init__(self, a, b, c, d):
+            super().__init__()
+
+    class SecondSimpleModule(NeuralModule):
+        """
+        Mockup component class.
+        """
+
+        def __init__(self, x, y):
+            super().__init__()
+
+    def setup_method(self, method):
+        """ 
+            Setup_method is invoked for every test method of a class.
+            Mocks up the classes.
+        """
+        # Mockup abstract methods.
+        TestNeuralModuleImport.FirstSimpleModule.__abstractmethods__ = set()
+        TestNeuralModuleImport.SecondSimpleModule.__abstractmethods__ = set()
+
+    @pytest.mark.unit
+    def test_simple_import_root_neural_module(self, tmpdir):
+        """
+            Tests whether the Neural Module can instantiate a simple module by loading a configuration file.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # params = {"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}
+        orig_module = TestNeuralModuleImport.FirstSimpleModule(123, 12.4, "ala ma kota", True)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("simple_import_root.yml"))
+        # Export.
+        orig_module.export_to_config(tmp_file_name)
+
+        # Import and create the new object.
+        new_module = NeuralModule.import_from_config(tmp_file_name)
+
+        # Compare class types.
+        assert type(orig_module).__name__ == type(new_module).__name__
+
+        # Compare objects - by its all params.
+        param_keys = orig_module.init_params.keys()
+        for key in param_keys:
+            assert orig_module.init_params[key] == new_module.init_params[key]
+
+    @pytest.mark.unit
+    def test_simple_import_leaf_module(self, tmpdir):
+        """
+            Tests whether a particular module can instantiate another
+            instance (a copy) by loading a configuration file.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # params = {"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}
+        orig_module = TestNeuralModuleImport.FirstSimpleModule(123, 12.4, "ala ma kota", True)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("simple_import_leaf.yml"))
+        # Export.
+        orig_module.export_to_config(tmp_file_name)
+
+        # Import and create the new object.
+        new_module = TestNeuralModuleImport.FirstSimpleModule.import_from_config(tmp_file_name)
+
+        # Compare class types.
+        assert type(orig_module).__name__ == type(new_module).__name__
+
+        # Compare objects - by its all params.
+        param_keys = orig_module.init_params.keys()
+        for key in param_keys:
+            assert orig_module.init_params[key] == new_module.init_params[key]
+
+    @pytest.mark.unit
+    def test_incompatible_import_leaf_module(self, tmpdir):
+        """
+            Tests whether a particular module can instantiate another
+            instance (a copy) by loading a configuration file.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+
+        # params = {"int": 123, "float": 12.4, "string": "ala ma kota", "bool": True}
+        orig_module = TestNeuralModuleImport.SecondSimpleModule(["No", "way", "dude!"], None)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("incompatible_import_leaf.yml"))
+        # Export.
+        orig_module.export_to_config(tmp_file_name)
+
+        # This will actuall create an instance of SecondSimpleModule - OK.
+        new_module = NeuralModule.import_from_config(tmp_file_name)
+        # Compare class types.
+        assert type(orig_module).__name__ == type(new_module).__name__
+
+        # This will create an instance of SecondSimpleModule, not FirstSimpleModule - SO NOT OK!!
+        with pytest.raises(ImportError):
+            _ = TestNeuralModuleImport.FirstSimpleModule.import_from_config(tmp_file_name)
diff --git a/tests/core/test_neural_modules_pytorch.py b/tests/unit/core/neural_module/test_module_initialization.py
similarity index 55%
rename from tests/core/test_neural_modules_pytorch.py
rename to tests/unit/core/neural_module/test_module_initialization.py
index d0cfbc44c62b..4814cdfadd98 100644
--- a/tests/core/test_neural_modules_pytorch.py
+++ b/tests/unit/core/neural_module/test_module_initialization.py
@@ -17,85 +17,71 @@
 # limitations under the License.
 # =============================================================================
 
-# TODO: These test look bad/useless - redo
+from unittest import TestCase
 
-import unittest
+import pytest
 
-import nemo
 from nemo.backends.pytorch.nm import TrainableNM
+from nemo.backends.pytorch.tutorials import TaylorNet
+from nemo.core.neural_modules import NmTensor
 from nemo.core.neural_types import ChannelType, NeuralType
-from tests.common_setup import NeMoUnitTest
 
 
-class TestNM1(TrainableNM):
-    def __init__(self, var1=1, var2=2, var3=3):
-        super(TestNM1, self).__init__()
+@pytest.mark.usefixtures("neural_factory")
+class ModuleInitializationTestCase(TestCase):
+    class TestNM1(TrainableNM):
+        def __init__(self, var1=1, var2=2, var3=3):
+            super().__init__()
 
+    class TestNM2(TestNM1):
+        def __init__(self, var2):
+            super().__init__(var2=var2)
 
-class TestNM2(TestNM1):
-    def __init__(self, var2):
-        super(TestNM2, self).__init__(var2=var2)
-
-
-class TestNeuralModulesPT(NeMoUnitTest):
     def setUp(self) -> None:
         super().setUp()
 
         # Mockup abstract methods.
-        TestNM1.__abstractmethods__ = set()
-        TestNM2.__abstractmethods__ = set()
+        ModuleInitializationTestCase.TestNM1.__abstractmethods__ = set()
+        ModuleInitializationTestCase.TestNM2.__abstractmethods__ = set()
 
+    @pytest.mark.unit
     def test_default_init_params(self):
-        simple_nm = TestNM1(var1=1)
+        simple_nm = ModuleInitializationTestCase.TestNM1(var1=1)
         init_params = simple_nm.init_params
         self.assertEqual(init_params["var1"], 1)
         self.assertEqual(init_params["var2"], 2)
         self.assertEqual(init_params["var3"], 3)
 
+    @pytest.mark.unit
     def test_simple_init_params(self):
-        simple_nm = TestNM1(var1=10, var3=30)
+        simple_nm = ModuleInitializationTestCase.TestNM1(var1=10, var3=30)
         init_params = simple_nm.init_params
         self.assertEqual(init_params["var1"], 10)
         self.assertEqual(init_params["var2"], 2)
         self.assertEqual(init_params["var3"], 30)
 
+    @pytest.mark.unit
     def test_nested_init_params(self):
-        simple_nm = TestNM2(var2="hello")
+        simple_nm = ModuleInitializationTestCase.TestNM2(var2="hello")
         init_params = simple_nm.init_params
         self.assertEqual(init_params["var2"], "hello")
 
+    @pytest.mark.unit
     def test_constructor_TaylorNet(self):
-        tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+        tn = TaylorNet(dim=4)
         self.assertEqual(tn.init_params["dim"], 4)
 
+    @pytest.mark.unit
     def test_call_TaylorNet(self):
-        x_tg = nemo.core.neural_modules.NmTensor(
+        x_tg = NmTensor(
             producer=None,
             producer_args=None,
-            name=None,
+            output_port_name=None,
             ntype=NeuralType(elements_type=ChannelType(), axes=('B', 'D')),
         )
 
-        tn = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+        tn = TaylorNet(dim=4)
         # note that real port's name: x was used
         y_pred = tn(x=x_tg)
         self.assertEqual(y_pred.producer, tn)
         self.assertEqual(y_pred.producer_args.get("x"), x_tg)
-
-    def test_simple_chain(self):
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=1)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-        x, y = data_source()
-        y_pred = trainable_module(x=x)
-        loss_tensor = loss(predictions=y_pred, target=y)
-
-        # check producers' bookkeeping
-        self.assertEqual(loss_tensor.producer, loss)
-        self.assertEqual(loss_tensor.producer_args, {"predictions": y_pred, "target": y})
-        self.assertEqual(y_pred.producer, trainable_module)
-        self.assertEqual(y_pred.producer_args, {"x": x})
-        self.assertEqual(y.producer, data_source)
-        self.assertEqual(y.producer_args, {})
-        self.assertEqual(x.producer, data_source)
-        self.assertEqual(x.producer_args, {})
diff --git a/tests/unit/core/tensorrt_format.py b/tests/unit/core/tensorrt_format.py
new file mode 100644
index 000000000000..4915d9873d9f
--- /dev/null
+++ b/tests/unit/core/tensorrt_format.py
@@ -0,0 +1,122 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import enum
+
+from nemo import logging
+
+
+# TRT does not include batch dimension.
+class DataFormat(enum.IntEnum):
+    UNKNOWN = 0
+    NW = 1
+    NHW = 2
+    CHW = 3
+    NHWC = 4
+    NCHW = 5
+
+
+def _generate_permutations():
+    def is_invertible(perm):
+        return min(perm) >= 0 and max(perm) < len(perm)
+
+    def inverse_permutation(perm):
+        inverse = [perm[index] for index in perm]
+        return inverse
+
+    # Inverse permutations are generated automatically below.
+    # We use -1 to denote that a dummy dimension of 1 should be inserted in the convert function.
+    initial_permutations = {
+        (DataFormat.NCHW, DataFormat.NCHW): (0, 1, 2, 3),
+        (DataFormat.NHWC, DataFormat.NHWC): (0, 1, 2, 3),
+        (DataFormat.NHWC, DataFormat.NCHW): (0, 3, 1, 2),
+        (DataFormat.CHW, DataFormat.CHW): (0, 1, 2),
+        (DataFormat.NCHW, DataFormat.CHW): (1, 2, 3),
+        (DataFormat.NHWC, DataFormat.CHW): (3, 1, 2),
+        (DataFormat.NHW, DataFormat.CHW): (-1, 1, 2),
+        (DataFormat.NW, DataFormat.CHW): (-1, -1, 1),
+    }
+    permutations = {}
+    for (f1, f2), perm in initial_permutations.items():
+        permutations[(f1, f2)] = perm
+        if is_invertible(perm):
+            permutations[(f2, f1)] = inverse_permutation(perm)
+    return permutations
+
+
+# This class is responsible for deducing the format of a shape,
+# and converting it to the desired format (specified as a DataFormat).
+class FormatManager(object):
+    # Dict[Tuple[DataFormat, DataFormat], Tuple[int]]
+    # This provides the correct permutation for various data format conversions.
+    DATA_PERMUTATIONS = _generate_permutations()
+
+    @staticmethod
+    def deduce_format(shape):
+        """
+        Guesses the data format of a given shape.
+
+        Args:
+            shape (Tuple[int]): The shape, including batch dimension.
+
+        Returns:
+            DataFormat: The deduced data format.
+        """
+        # The smaller this ratio, the closer a and b are.
+        def minmax_ratio(a, b):
+            return abs(max(a, b) / min(a, b))
+
+        # Assume all shapes include batch dimension
+        if len(shape) == 4:
+            # Typically, H and W are quite close, so if minmax_ratio(0, 1) > minmax_ratio(1, 2), then we assume CHW.
+            if minmax_ratio(shape[1], shape[2]) > minmax_ratio(shape[2], shape[3]):
+                return DataFormat.NCHW
+            return DataFormat.NHWC
+        elif len(shape) == 3:
+            return DataFormat.NHW
+        elif len(shape) == 2:
+            return DataFormat.NW
+        else:
+            logging.warning(
+                "Cannot deduce format for "
+                + str(shape)
+                + ". Currently only implemented for input_buffers with 1-3 non-batch dimensions. Please update this function!"
+            )
+            return DataFormat.UNKNOWN
+
+    # Get the permutation required to transpose old_format to new_format
+    @staticmethod
+    def permutation(old_format, new_format):
+        return FormatManager.DATA_PERMUTATIONS[(old_format, new_format)]
+
+    @staticmethod
+    def convert(shape, new_format):
+        """
+        Permutes a shape from one format to another.
+
+        Args:
+            shape (Tuple[int]): The shape to convert.
+            new_format (DataFormat): The desired format of the shape.
+
+        Returns:
+            Tuple[int]: A new shape in the correct format.
+        """
+        old_format = FormatManager.deduce_format(shape)
+        perm = FormatManager.permutation(old_format, new_format)
+        return [shape[index] if index != -1 else 1 for index in perm]
diff --git a/tests/unit/core/tensorrt_loaders.py b/tests/unit/core/tensorrt_loaders.py
new file mode 100644
index 000000000000..c8c0726d5848
--- /dev/null
+++ b/tests/unit/core/tensorrt_loaders.py
@@ -0,0 +1,416 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import time
+import warnings
+from collections import OrderedDict
+
+import numpy as np
+import onnx
+import tensorrt as trt
+
+from .tensorrt_format import FormatManager
+from .tensorrt_runner import (
+    DEFAULT_SHAPE_VALUE,
+    TRT_LOGGER,
+    TensorRTRunnerV2,
+    default_value,
+    find_in_dict,
+    get_input_metadata_from_profile,
+    is_dimension_dynamic,
+    is_shape_dynamic,
+    is_valid_shape_override,
+    send_on_queue,
+    write_timestamped,
+)
+from nemo import logging, logging_mode
+
+
+def set_onnx_logging_level(sev):
+    if sev >= logging.INFO:
+        warnings.filterwarnings("ignore")
+
+
+class BaseDataLoader(object):
+    """
+    Responsible for fetching or generting input data for runners.
+    """
+
+    def __call__(self, index, input_metadata, input_example=None):
+        """
+        Fetches or generates inputs.
+
+        Args:
+            index (int): The index of inputs to fetch. For any given index, the inputs should always be the same.
+            input_metadata (OrderedDict[str, Tuple[np.dtype, Tuple[int]]]): Mapping of input names to their data types and shapes.
+
+        Returns:
+            OrderedDict[str, np.ndarray]: Mapping of input names to numpy buffers containing data.
+        """
+        raise NotImplementedError("BaseDataLoader is an abstract class")
+
+
+class DefaultDataLoader(BaseDataLoader):
+    def __init__(
+        self,
+        seed=None,
+        default_shape_value=None,
+        default_shapes=None,
+        int_min=None,
+        int_max=None,
+        float_min=None,
+        float_max=None,
+    ):
+        """
+        Optional Args:
+            seed (int): The seed to use when generating random inputs.
+            default_shape_value (int): The default value to use when a dimension is dynamic.
+            default_shapes (Dict[str, Tuple[int]]): A mapping of input names to their corresponding shapes.
+        """
+        self.seed = default_value(seed, int(time.time()))
+        self.default_shapes = default_value(default_shapes, {})
+        self.default_shape_value = default_value(default_shape_value, DEFAULT_SHAPE_VALUE)
+        self.int_min = default_value(int_min, 1)
+        self.int_max = default_value(int_max, 25)
+        self.float_min = default_value(float_min, -1.0)
+        self.float_max = default_value(float_max, 1.0)
+
+    def __call__(self, index, input_metadata, input_example=None):
+        logging.debug("Updating seed to: {:}".format(self.seed + index))
+        rng = np.random.RandomState(self.seed + index)
+
+        buffers = OrderedDict()
+        i = 0
+        for name, (dtype, shape) in input_metadata.items():
+            if input_example is not None and (not isinstance(input_example, tuple) or i < len(input_example)):
+                if isinstance(input_example, tuple):
+                    static_shape = input_example[i].shape
+                elif isinstance(input_example, OrderedDict):
+                    static_shape = tuple(input_example.values())[i].shape
+                else:
+                    static_shape = [tuple(input_example.shape)]
+            elif is_shape_dynamic(shape):
+                if name in self.default_shapes:
+                    static_shape = self.default_shapes[name]
+                else:
+                    static_shape = [self.default_shape_value if is_dimension_dynamic(elem) else elem for elem in shape]
+                if static_shape != shape:
+                    if not is_valid_shape_override(static_shape, shape):
+                        logging.critical(
+                            "Cannot override original shape: {:}, for input: {:} to {:}".format(
+                                shape, name, static_shape
+                            )
+                        )
+                    logging.warning(
+                        "Input: {:}: Adjusted dynamic shape: {:} to: {:}".format(name, shape, static_shape),
+                        mode=logging_mode.ONCE,
+                    )
+            else:
+                if name in self.default_shapes:
+                    logging.warning(
+                        "Will not override static shape: {:}, for input: {:}".format(shape, name),
+                        mode=logging_mode.ONCE,
+                    )
+                static_shape = shape
+
+            if input_example is not None and (not isinstance(input_example, tuple) or i < len(input_example)):
+                if isinstance(input_example, OrderedDict):
+                    buffers[name] = list(input_example.values())[i].cpu()
+                else:
+                    buffers[name] = input_example[i].cpu() if isinstance(input_example, tuple) else input_example.cpu()
+            elif np.issubdtype(dtype, np.integer):
+                buffers[name] = rng.randint(low=self.int_min, high=self.int_max, size=static_shape, dtype=dtype)
+            elif np.issubdtype(dtype, np.bool_):
+                buffers[name] = rng.randint(low=0, high=2, size=static_shape).astype(dtype)
+            else:
+                buffers[name] = (
+                    rng.random_sample(size=static_shape) * (self.float_max - self.float_min) + self.float_min
+                ).astype(dtype)
+
+            buffers[name] = np.array(
+                buffers[name]
+            )  # To handle scalars. The above functions return a float if shape is ().
+
+            # If the shape is 1D, and has a length equal to the rank of the provided default shape, it is
+            # likely to be a TRT shape tensor, and so should be overriden such that it's value (not shape) is the default shape.
+            is_shape_tensor = (
+                (not is_shape_dynamic(shape))
+                and (name in self.default_shapes)
+                and (len(shape) == 1)
+                and (shape[0] == len(self.default_shapes[name]))
+            )
+            if is_shape_tensor:
+                buffers[name] = np.array(self.default_shapes[name], dtype=dtype)
+                logging.warning(
+                    "Assuming {:} is a shape tensor. Setting to: {:}".format(name, buffers[name]),
+                    mode=logging_mode.ONCE,
+                )
+            i = i + 1
+
+        return buffers
+
+
+# Caches data loaded by a DataLoader for use across multiple runners.
+class DataLoaderCache(object):
+    def __init__(self, data_loader):
+        self.data_loader = data_loader
+        self.cache = {}  # Dict[int, OrderedDict[str, np.ndarray]]
+
+    def load(self, iteration, input_metadata, input_example=None):
+        """
+        Load the specified iteration from the cache if present, or generate using the data loader.
+
+        Args:
+            iteration (int): The iteration whose data to retrieve.
+            input_metadata (OrderedDict[str, Tuple[np.dtype, Tuple[int]]]): Input Metadata, including shape and type information. The loader may attempt to match input_metadata when data in the cache does not exactly match a new set of input_metadata.
+        """
+        if iteration not in self.cache:
+            logging.debug("Iteration {:} not found in cache, generating new buffers for all inputs".format(iteration))
+            self.cache[iteration] = self.data_loader(iteration, input_metadata, input_example)
+            if self.cache[iteration] is None:
+                logging.critical(
+                    "Received no data from data_loader(iteration, input_metadata) for input_metadata: {:}".format(
+                        input_metadata
+                    )
+                )
+        else:
+            logging.info("Found iteration {:} in cache".format(iteration))
+
+        feed_dict = OrderedDict()
+        for index, (name, (dtype, shape)) in enumerate(input_metadata.items()):
+            cached_name = find_in_dict(name, self.cache[iteration], index)
+            if cached_name is None:
+                logging.warning("Could not find input: {:} in cache, regenerating buffers".format(name))
+                self.cache[iteration] = self.data_loader(iteration, input_metadata, input_example)
+                cached_name = name
+
+            buffer = self.cache[iteration][cached_name]
+
+            if dtype != buffer.dtype:
+                logging.warning(
+                    "Cached buffer data type does not match data type for input: {:}. Note: Cached type: {:}, input type: {:}. Attempting to cast".format(
+                        name, buffer.dtype, dtype
+                    )
+                )
+                buffer = buffer.astype(dtype)
+
+            if not is_valid_shape_override(buffer.shape, shape):
+                logging.warning(
+                    "Cached buffer shape does not match shape for input. Note: Cached shape: {:}, input shape: {:}.".format(
+                        buffer.shape, shape
+                    )
+                )
+                # Try to permute the shape to match
+                try:
+                    perm = FormatManager.permutation(
+                        FormatManager.deduce_format(buffer.shape), FormatManager.deduce_format(shape)
+                    )
+                    new_shape = FormatManager.convert(tuple(buffer.shape), FormatManager.deduce_format(shape))
+                    logging.warning(
+                        "Attempting to permute shape: {:} using permutation {:}. New shape: {:}".format(
+                            buffer.shape, perm, new_shape
+                        )
+                    )
+                    buffer = np.transpose(buffer, perm)
+                except NotImplementedError as err:
+                    # If the FormatManager does not recognize the format, skip permutation.
+                    logging.info("Skipping permutation due to {:}".format(err))
+                except KeyError as err:
+                    # If the FormatManager cannot generate the permutation for the format combination, skip permutation.
+                    logging.info("Skipping permutation due to {:}".format(err))
+
+            feed_dict[name] = buffer
+        return feed_dict
+
+
+class BaseModelLoader(object):
+    """
+    Loads a model for a runner.
+    """
+
+    def __call__(self):
+        """
+        Load the model.
+
+        Returns:
+            A model usable by the runner. The return type is dependent on the runner the loader has been implemented for.
+        """
+        raise NotImplementedError("BaseModelLoader is an abstract class")
+
+
+class BaseOnnxModelLoader(BaseModelLoader):
+    def check(self, model):
+        try:
+            onnx.checker.check_model(model)
+            logging.debug("ONNX Checker Passed")
+        except onnx.checker.ValidationError as err:
+            logging.warning("ONNX Checker exited with an error: {:}".format(err))
+        return model
+
+
+# ONNX loaders return ONNX models in memory.
+class OnnxFileLoader(BaseOnnxModelLoader):
+    def __init__(self, path):
+        """
+        Loads an ONNX model from a file.
+
+        Args:
+            path (str): The path from which to load the model.
+        """
+        self.path = path
+
+    def __call__(self):
+        logging.info("Loading {:}".format(self.path))
+        return self.check(onnx.load(self.path))
+
+    def __str__(self):
+        return "ONNX Model Loader: {:}".format(self.path)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class OnnxNetworkLoader(BaseModelLoader):
+    def __init__(self, onnx_loader, explicit_precision=None):
+        """
+        Parses an ONNX model to create an engine.
+
+        Args:
+            onnx_loader (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model.
+
+        Optional Args:
+            explicit_precision (bool): Whether to create the network with explicit precision enabled.
+        """
+        self.onnx_loader = onnx_loader
+        self.explicit_precision = default_value(explicit_precision, False)
+
+    def __call__(self):
+        network = TensorRTRunnerV2.create_network(explicit_precision=self.explicit_precision)
+
+        parser = trt.OnnxParser(network, TRT_LOGGER)
+        success = parser.parse(self.onnx_loader().SerializeToString())
+        if not success:
+            for index in range(parser.num_errors):
+                logging.error(parser.get_error(index))
+            logging.critical("Could not parse ONNX correctly")
+
+        return network, parser
+
+
+class BuildEngineLoader(BaseModelLoader):
+    def __init__(
+        self,
+        network_loader,
+        max_workspace_size=None,
+        fp16_mode=None,
+        int8_mode=None,
+        profile_shapes=None,
+        write_engine=None,
+        calibrator=None,
+        preprocess_network=None,
+        layerwise=None,
+    ):
+        """
+        Uses a TensorRT INetworkDefinition to build an engine
+
+        Args:
+            network_loader (Callable()->trt.INetworkDefinition): A callable capable of returning an TensorRT INetworkDefinition. The returned network is owned by the BuildEngineLoader and should not be freed manually. The callable may have at most 2 return values if another object needs to be kept alive for the duration of the network, e.g., in the case of a parser. BuildEngineLoader will take ownership of the second return value, and, like the network, it should not be freed by the callable. The first return value must always be the network.
+
+        Optional Args:
+            max_workspace_size (int): The maximum workspace size, in bytes, when building the engine.
+            fp16_mode (bool): Whether to build the engine with fp16 mode enabled.
+            int8_mode (bool): Whether to build the engine with int8 mode enabled.
+            profile_shapes (Dict[str, List[shape, shape, shape]]): A mapping of binding name to min/opt/max shapes. Only needed for networks with dynamic input shapes.
+            write_engine (str): A directory in which to save the engine.
+            calibrator (trt_smeagol.runners.tensorrt_runner_v2.Calibrator): An int8 calibrator. Only required in int8 mode when the network does not have explicit precision.
+            preprocess_network (Callable(trt.INetworkDefinition)): Preprocessing function for the network definition. May be used to modify the network after parsing. This is called before enabling layerwise outputs.
+            layerwise (bool): Whether to treat the output of every layer as an output of the network. Defaults to False.
+        """
+        self.network_loader = network_loader
+        self.max_workspace_size = default_value(max_workspace_size, 1 << 24)
+        self.fp16_mode = default_value(fp16_mode, False)
+        self.int8_mode = default_value(int8_mode, False)
+        self.profile_shapes = default_value(profile_shapes, OrderedDict())
+        self.write_engine = write_engine
+        self.written_engine_path = None
+        self.calibrator = calibrator
+        self.preprocess_network = default_value(preprocess_network, None)
+        self.layerwise = default_value(layerwise, False)
+
+    def __call__(self):
+        class DummyContextManager(object):
+            def __enter__(self):
+                return None
+
+            def __exit__(self, exc_type, exc_value, traceback):
+                return None
+
+        network_parser = self.network_loader()
+        try:
+            network, parser = network_parser
+            assert isinstance(network, trt.INetworkDefinition)
+        except (ValueError, AssertionError):
+            network = network_parser
+            parser = DummyContextManager()
+
+        with trt.Builder(TRT_LOGGER) as builder, network, parser:
+            if self.preprocess_network:
+                logging.debug("Applying network preprocessing: {:}".format(self.preprocess_network))
+                self.preprocess_network(network)
+
+            if self.layerwise:
+                TensorRTRunnerV2.mark_layerwise(network)
+
+            if logging.getEffectiveLevel() <= logging.DEBUG:
+                TensorRTRunnerV2.log_network(network)
+
+            config = builder.create_builder_config()
+            profile = TensorRTRunnerV2.build_profile(builder, network, self.profile_shapes)
+            config.add_optimization_profile(profile)
+
+            config.max_workspace_size = int(self.max_workspace_size)
+            if self.fp16_mode:
+                config.flags = 1 << int(trt.BuilderFlag.FP16)
+            if self.int8_mode:
+                config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8)
+                if not network.has_explicit_precision:
+                    if not self.calibrator:
+                        logging.critical(
+                            "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode."
+                        )
+                    self.calibrator.set_input_metadata(get_input_metadata_from_profile(profile, network))
+                    config.int8_calibrator = self.calibrator
+
+            logging.debug("Using builder configuration flags: {:}".format(config.flags))
+            logging.info(
+                "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}".format(
+                    self.max_workspace_size, self.fp16_mode, self.int8_mode, self.layerwise
+                )
+            )
+            engine = builder.build_engine(network, config)
+            self.written_engine_path = write_timestamped(
+                contents=lambda: engine.serialize(), dir=self.write_engine, name="tensorrt_runner_v2.engine"
+            )
+            return engine
+
+    def get_engine_path(self):
+        """
+        Returns the path at which the engine was written, or None if write_engine was not specified.
+        """
+        return self.written_engine_path
diff --git a/tests/unit/core/tensorrt_runner.py b/tests/unit/core/tensorrt_runner.py
new file mode 100644
index 000000000000..afce208ab2e7
--- /dev/null
+++ b/tests/unit/core/tensorrt_runner.py
@@ -0,0 +1,720 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+# Sets up everything needed to perform inference with TensorRT.
+import os
+import pickle
+import sys
+import time
+import zlib
+from collections import OrderedDict
+
+import numpy as np
+
+# Only initialize GPU after this runner is activated.
+import pycuda.autoinit
+
+# This import causes pycuda to automatically manage CUDA context creation and cleanup.
+import pycuda.driver as cuda
+import tensorrt as trt
+
+from nemo import logging, logging_mode
+
+logging.info("Using TensorRT {:}".format(trt.__version__))
+logging.debug("Note: Using tensorrt from {:}".format(trt.__path__))
+
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+def set_trt_logging_level(sev):
+    global TRT_LOGGER
+    if sev == logging.DEBUG:
+        logging.min_severity = trt.Logger.INFO
+    elif sev == logging.WARNING:
+        logging.min_severity = trt.Logger.WARNING
+    elif sev == logging.ERROR:
+        logging.min_severity = trt.Logger.ERROR
+    elif sev == logging.CRITICAL:
+        logging.min_severity = trt.Logger.INTERNAL_ERROR
+
+
+TRT_DYNAMIC_DIM = -1
+DEFAULT_SHAPE_VALUE = 1
+
+
+# Attempt to partially match output names. Returns None on failure
+# Checks for exact matches and substring matches, falling back to index based matching.
+def find_in_dict(name, map, index=None):
+    if name in map:
+        return name
+    for key in map.keys():
+        if name.lower() in key.lower() or key.lower() in name.lower():
+            return key
+    if index is not None and index >= 0 and index < len(map.keys()):
+        return list(map.keys())[index]
+    return None
+
+
+def default_value(value, default):
+    return value if value is not None else default
+
+
+def is_dimension_dynamic(dim):
+    return dim is None or dim <= 0
+
+
+def is_shape_dynamic(shape):
+    return any([is_dimension_dynamic(dim) for dim in shape])
+
+
+def is_valid_shape_override(new_shape, original_shape):
+    ranks_same = len(original_shape) == len(new_shape)
+    overrides_valid = all(
+        [odim == ndim or is_dimension_dynamic(odim) for odim, ndim in zip(original_shape, new_shape)]
+    )
+    return ranks_same and overrides_valid
+
+
+def volume(obj):
+    vol = 1
+    for elem in obj:
+        vol *= elem
+    return vol
+
+
+def compress(info):
+    return zlib.compress(pickle.dumps(info))
+
+
+def decompress(bytes):
+    return pickle.loads(zlib.decompress(bytes))
+
+
+def is_compressed(obj):
+    return isinstance(obj, bytes)
+
+
+def is_pickleable(obj):
+    try:
+        pickle.dumps(obj)
+        return True
+    except TypeError:
+        return False
+
+
+def pickle_load(path):
+    with open(path, "rb") as f:
+        return pickle.loads(f.read())
+
+
+def pickle_save(path, obj):
+    with open(path, "wb") as f:
+        return f.write(pickle.dumps(obj))
+
+
+# The maximum number of bytes that can be sent at once over a queue.
+PIPE_MAX_SEND_BYTES = 1 << 31
+
+# Attempts to send an object over the queue, compresses if needed. In the event the object cannot be sent, sends None instead.
+def send_on_queue(queue, obj):
+    if not is_pickleable(obj):
+        logging.warning("Cannot pickle: {:}. Sending None instead".format(obj))
+        queue.put(None)
+        return
+
+    if sys.getsizeof(obj) > PIPE_MAX_SEND_BYTES:
+        logging.warning(
+            "Object size ({:} bytes) exceeds maximum size that can be sent over queues ({:} bytes). Attempting to compress - this may take some time. If this does not work or you want to avoid the compression overhead, you should disable subprocesses via the --no-subprocess flag, or by setting use_subprocess=False in Comparator.run().".format(
+                sys.getsizeof(obj), PIPE_MAX_SEND_BYTES
+            )
+        )
+        obj = compress(obj)
+
+    if sys.getsizeof(obj) > PIPE_MAX_SEND_BYTES:
+        logging.warning("Compressed object is still too large to send. Sending None instead.")
+        queue.put(None)
+        return
+
+    logging.info("Sending: {:} on queue".format(obj))
+    queue.put(obj)
+
+
+def receive_on_queue(queue, timeout=None):
+    logging.info("Waiting for data to become available on queue")
+    obj = queue.get(block=True, timeout=timeout)
+    if is_compressed(obj):
+        logging.debug("Decompressing output")
+        obj = decompress(obj)
+    logging.info("Received {:} on queue".format(obj))
+    return obj
+
+
+def timestamped_filepath(dir, name):
+    name, ext = os.path.splitext(name)
+    return os.path.join(dir, "{:}.{:}{:}".format(name, time.strftime("%Y-%m-%d-%H-%M-%S"), ext))
+
+
+def write_timestamped(contents, dir=None, name=None, mode="wb"):
+    """
+    Generates a timestamped file path in the specified directory.
+
+    Args:
+        contents (bytes-like object or callable): Either a bytes-like object that can be written to disk, or a callable which will return such an object.
+        dir (str): The directory to write into.
+        name (str): The name of the file.
+
+    Optional Args:
+        mode(str): The mode to use when writing. Defaults to "wb".
+
+    Returns:
+        str: The complete file path, or None if nothing was written.
+    """
+    if dir is not None:
+        if not os.path.exists(dir):
+            # logging.debug("{:} does not exist, creating now.".format(dir))
+            os.makedirs(dir, exist_ok=True)
+
+        path = timestamped_filepath(dir, name)
+
+        if callable(contents):
+            contents = contents()
+
+        if os.path.exists(path):
+            logging.warning("{:} already exists. Will not overwrite.".format(path))
+        else:
+            with open(path, mode) as f:
+                logging.info("Writing to {:}".format(path))
+                f.write(contents)
+            return path
+    return None
+
+
+def get_input_metadata_from_profile(profile, network):
+    input_metadata = OrderedDict()
+    for index in range(network.num_inputs):
+        tensor = network.get_input(index)
+        if tensor.is_shape_tensor:
+            shapes = profile.get_shape_input(tensor.name)
+        else:
+            shapes = profile.get_shape(tensor.name)
+        if tuple(shapes[0]) != tuple(shapes[1]):
+            logging.warning("In profile 0, min != max, using opt shapes for calibration")
+        # Always use opt shape
+        input_metadata[tensor.name] = (trt.nptype(tensor.dtype), shapes[1])
+    return input_metadata
+
+
+class BaseBuffer(object):
+    def __init__(self, shape=None, dtype=None):
+        self.dtype = default_value(dtype, np.float32)
+        self.shape = default_value(shape, tuple())
+        self.allocate(self.shape)
+
+    # If the new shape is larger, reallocate, otherwise do nothing.
+    def resize(self, new_shape):
+        if volume(new_shape) > volume(self.shape):
+            self.free()
+            self.allocate(new_shape)
+        self.shape = new_shape
+
+    def __str__(self):
+        return "({:}: shape={:}, dtype={:})".format(type(self).__name__, self.shape, self.dtype)
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class DeviceBuffer(BaseBuffer):
+    def allocate(self, shape):
+        self.ptr = cuda.mem_alloc(volume(shape) * np.dtype(self.dtype).itemsize)
+
+    def free(self):
+        self.ptr.free()
+
+    # Copies a numpy buffer to device
+    def copy_htod(self, np_buffer, stream=None):
+        if stream:
+            # PyCUDA requires the host buffer to be pagelocked for asynchronous memcpys.
+            pagelocked = cuda.register_host_memory(np.ascontiguousarray(np_buffer.ravel()))
+            cuda.memcpy_htod_async(self.ptr, pagelocked, stream)
+        else:
+            cuda.memcpy_htod(self.ptr, np.ascontiguousarray(np_buffer.ravel()))
+
+
+class HostBuffer(BaseBuffer):
+    def allocate(self, shape):
+        self.ptr = cuda.pagelocked_empty(shape, self.dtype).ravel()
+
+    def free(self):
+        del self.ptr
+
+    # Copies a DeviceBuffer to host
+    def copy_dtoh(self, device_buffer, stream=None):
+        if stream:
+            cuda.memcpy_dtoh_async(self.ptr, device_buffer.ptr, stream)
+        else:
+            cuda.memcpy_dtoh(self.ptr, device_buffer.ptr)
+
+    # Return a view of the buffer which has the correct shape
+    def view(self):
+        return self.ptr[: volume(self.shape)].reshape(self.shape)
+
+
+class Buffers(object):
+    @staticmethod
+    def from_engine(engine):
+        buffers = Buffers()
+        for binding in engine:
+            dtype = trt.nptype(engine.get_binding_dtype(binding))
+            buffers.device_buffers[binding] = DeviceBuffer(dtype=dtype)
+            if not engine.binding_is_input(binding):
+                buffers.host_outputs[binding] = HostBuffer(dtype=dtype)
+        return buffers
+
+    def __init__(self):
+        self.device_buffers = OrderedDict()
+        self.host_outputs = OrderedDict()
+
+    # Resize the specfied buffer to the specified shape
+    def resize(self, name, shape):
+        found = False
+        for buf_dict in [self.device_buffers, self.host_outputs]:
+            if name in buf_dict:
+                found = True
+                buf_dict[name].resize(shape)
+
+        if not found:
+            logging.warning("Buffer: {:} was not found, could not resize".format(name))
+        else:
+            logging.debug("Resizing {:} buffer to {:}".format(name, shape))
+
+    def copy_inputs(self, feed_dict, stream=None):
+        for name, buffer in feed_dict.items():
+            self.device_buffers[name].copy_htod(buffer, stream)
+
+    # Copies outputs from the device back to host.
+    def copy_outputs(self, stream=None):
+        for name, buffer in self.host_outputs.items():
+            buffer.copy_dtoh(self.device_buffers[name], stream)
+
+    def get_bindings(self):
+        return [int(buf.ptr) for buf in self.device_buffers.values()]
+
+    # Gets a dictionary mapping names to numpy buffers.
+    def get_outputs(self):
+        out_dict = OrderedDict()
+        for name, buffer in self.host_outputs.items():
+            out_dict[name] = buffer.view()
+        return out_dict
+
+    def free(self):
+        [buf.free() for buf in self.device_buffers.values()]
+        [buf.free() for buf in self.host_outputs.values()]
+
+
+class BaseRunner(object):
+    def __init__(self, name=None):
+        """
+        The base class for runner objects. All runners should override the functions and attributes specified here.
+
+        Vars:
+            name (str): The name of this runner.
+        """
+        self.name = default_value(name, "Runner")
+        self.inference_time = None
+
+    def __enter__(self):
+        """
+        Activate the runner for inference. This may involve allocating GPU buffers, for example.
+
+        It is extremely important that the GPU is not used by the runner before the __enter__ function is called.
+
+        Vars:
+            inputs (OrderedDict[InputKey, Tuple[int]]): A mapping of input tensor names to their shapes, INCLUDING batch dimension, for this runner. This MUST be known at runner initialization for the Comparator to work correctly. InputKey can be any type used to uniquely indentify an input, e.g. a string containing the input name.
+        """
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        """
+        Deactivate the runner. All memory allocated by __enter__() should be freed, such that the runner is no longer using the GPU.
+
+        Returns:
+            BaseRunner
+        """
+        pass
+
+    def infer(self, feed_dict, output):
+        """
+        Runs inference using the provided feed_dict.
+
+        Args:
+            feed_dict (OrderedDict[str, np.ndarray]): A mapping of input tensor names to corresponding input numpy arrays.
+
+        Returns:
+            OrderedDict[str, np.ndarray]: A mapping of output tensor names to their corresponding numpy arrays.
+        """
+        raise NotImplementedError("BaseRunner is an abstract class")
+
+    # def last_inference_time(self):
+    #     """
+    #     Returns the time required for the last call to `infer`
+    #
+    #     Returns:
+    #         float: The time in seconds
+    #     """
+    #     if self.inference_time is None:
+    #         logging.warning("inference_time was not set for this runner. Inference time will be incorrect! To correctly compare runtimes, please set the inference_time property in the infer() function", mode=LogMode.ONCE)
+    #         return 1
+    #     return self.inference_time
+
+    def get_input_metadata(self, input_examples, output_example):
+        """
+        Returns information about the inputs of the model. Shapes here may be unknown/dynamic. Must be called after __enter__() and before __exit__()
+
+        Returns:
+            OrderedDict[str, Tuple[np.dtype, Tuple[int]]]: Mapping of input names to their data types and shapes.
+        """
+        raise NotImplementedError("BaseRunner is an abstract class")
+
+
+# Builds and tracks a single engine for a single network.
+class TensorRTRunnerV2(BaseRunner):
+    total_runners = 0
+    """
+    A runner that can perform inference on a single TensorRT engine.
+    """
+
+    def __init__(self, model_loader=None, plugins=None, name=None):
+        """
+        Creates a runner that manages a single TensorRT engine.
+
+        Args:
+            model_loader (Callable() -> trt.ICudaEngine): A callable that can supply a TensorRT engine.
+
+        Optional Args:
+            max_workspace_size (int): The maximum workspace size in bytes.
+            plugins (List[str]): A list of paths to plugin libraries to load before inference.
+            name (str): The human-readable name to use for this runner.
+        """
+        set_trt_logging_level(logging.getEffectiveLevel())
+
+        def load_plugins():
+            import ctypes
+
+            for plugin in plugins:
+                path = os.path.abspath(plugin)
+                logging.info("Loading plugin library: {:}".format(path))
+                ctypes.CDLL(path)
+
+        # Load any user-supplied plugin libraries. This must happen before everything else, including engine deserialization.
+        if plugins:
+            load_plugins()
+
+        # Choose a unique name for this runner.
+        super().__init__(default_value(name, "trt-v2-runner-{:}".format(TensorRTRunnerV2.total_runners)))
+        TensorRTRunnerV2.total_runners += 1
+        logging.debug("Creating {:}".format(self.name))
+
+        self.model_loader = model_loader
+
+        self.engine = self.model_loader()
+        if not self.engine:
+            logging.critical("Invalid Engine. Please ensure the engine was built correctly.")
+
+        self.buffers = Buffers.from_engine(self.engine)
+        self.stream = cuda.Stream()
+
+        self.context = self.engine.create_execution_context()
+
+    def __enter__(self):
+        """
+        Vars:
+            engine (trt.ICudaEngine): The engine tracked by this runner. The TensorRTRunnerV2 OWNS the engine it manages, and therefore is responsible for it's destruction. Do not free the engine outside of the runner, or it will result in a double free.
+            context (trt.IExecutionContext): The context used for inference.
+            stream (pycuda.driver.Stream): The CUDA stream that this runner will use for inference.
+        """
+        return self
+
+    @staticmethod
+    def override_shape_list(shape):
+        return [DEFAULT_SHAPE_VALUE if is_dimension_dynamic(dim) else dim for dim in shape]
+
+    def get_input_metadata(self):
+        inputs = OrderedDict()
+        active_profile = self.context.active_optimization_profile
+        bindings_per_profile = len(self.engine) // self.engine.num_optimization_profiles
+        logging.debug(
+            "Total # of Profiles: {:}, Bindings Per Profile: {:}, Active Profile: {:}".format(
+                self.engine.num_optimization_profiles, bindings_per_profile, active_profile
+            )
+        )
+
+        start_binding = bindings_per_profile * active_profile
+        end_binding = start_binding + bindings_per_profile
+        logging.info("Start Binding: {:}, End Binding: {:}".format(start_binding, end_binding))
+
+        for binding in range(start_binding, end_binding):
+            if self.engine.binding_is_input(binding):
+                inputs[self.engine[binding]] = (
+                    trt.nptype(self.engine.get_binding_dtype(binding)),
+                    list(self.engine.get_binding_shape(binding)),
+                )
+        return inputs
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        # Destroy the engine, and context.
+        with self.engine, self.context:
+            pass
+
+        self.buffers.free()
+        del self.stream
+
+    def infer(self, feed_dict, output):
+        for name in self.engine:
+            if name in feed_dict:
+                in_out = [feed_dict[name]]
+            elif isinstance(output, tuple):
+                in_out = [output[i].detach().cpu().numpy() for i in range(len(output))]
+            else:
+                in_out = [output.detach().cpu().numpy()]
+
+            binding = self.engine[name]
+
+            # Only set shapes if required
+            for i in range(len(in_out)):
+                shape = in_out[i].shape
+                if self.engine.is_shape_binding(binding) and is_shape_dynamic(self.context.get_shape(binding)):
+                    logging.debug("Setting shape binding: {:} (index: {:}) to: {:}".format(name, binding, in_out[i]))
+                    self.context.set_shape_input(binding, in_out[i])
+                elif is_shape_dynamic(self.context.get_binding_shape(binding)):
+                    logging.debug("Setting binding: {:} (index: {:}) to shape: {:}".format(name, binding, shape))
+                    self.context.set_binding_shape(binding, shape)
+
+        # Check
+        if not self.context.all_binding_shapes_specified:
+            logging.critical(
+                "Some input shapes were not specified.\nNote: Inputs are: {:}".format(self.get_input_metadata())
+            )
+        if not self.context.all_shape_inputs_specified:
+            logging.critical(
+                "Some shape inputs were not specified.\nNote: Inputs are: {:}".format(self.get_input_metadata())
+            )
+
+        bindings_per_profile = self.engine.num_bindings // self.engine.num_optimization_profiles
+        start_binding = self.context.active_optimization_profile * bindings_per_profile
+        end_binding = start_binding + bindings_per_profile
+
+        # Resize buffers so they are the appropriate size.
+        for binding in range(start_binding, end_binding):
+            shape = tuple(self.context.get_binding_shape(binding))
+            self.buffers.resize(self.engine[binding], shape)
+
+        bindings = self.buffers.get_bindings()
+
+        start = time.perf_counter()
+        self.buffers.copy_inputs(feed_dict, self.stream)
+        self.context.execute_async_v2(bindings=bindings, stream_handle=self.stream.handle)
+        self.buffers.copy_outputs(self.stream)
+        self.stream.synchronize()
+        end = time.perf_counter()
+
+        self.inference_time = end - start
+        return self.buffers.get_outputs()
+
+    # Utility functions related to TensorRT, but not tied to any specific instance.
+    @staticmethod
+    def create_network(explicit_batch=True, explicit_precision=False):
+        with trt.Builder(TRT_LOGGER) as builder:
+            network_flags = 0
+            if explicit_batch:
+                network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+            if explicit_precision:
+                network_flags = network_flags | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))
+            network = builder.create_network(flags=network_flags)
+            if network is None:
+                logging.critical("Invalid network")
+            return network
+
+    @staticmethod
+    def get_network_inputs(network):
+        return {network.get_input(i).name: network.get_input(i).shape for i in range(network.num_inputs)}
+
+    @staticmethod
+    def log_network(network):
+        LAYER_TYPE_CLASS_MAPPING = {
+            trt.LayerType.CONVOLUTION: trt.IConvolutionLayer,
+            trt.LayerType.FULLY_CONNECTED: trt.IFullyConnectedLayer,
+            trt.LayerType.ACTIVATION: trt.IActivationLayer,
+            trt.LayerType.POOLING: trt.IPoolingLayer,
+            trt.LayerType.LRN: trt.ILRNLayer,
+            trt.LayerType.SCALE: trt.IScaleLayer,
+            trt.LayerType.SOFTMAX: trt.ISoftMaxLayer,
+            trt.LayerType.DECONVOLUTION: trt.IDeconvolutionLayer,
+            trt.LayerType.CONCATENATION: trt.IConcatenationLayer,
+            trt.LayerType.ELEMENTWISE: trt.IElementWiseLayer,
+            trt.LayerType.PLUGIN: trt.IPluginLayer,
+            trt.LayerType.RNN: trt.IRNNLayer,
+            trt.LayerType.UNARY: trt.IUnaryLayer,
+            trt.LayerType.PADDING: trt.IPaddingLayer,
+            trt.LayerType.SHUFFLE: trt.IShuffleLayer,
+            trt.LayerType.REDUCE: trt.IReduceLayer,
+            trt.LayerType.TOPK: trt.ITopKLayer,
+            trt.LayerType.GATHER: trt.IGatherLayer,
+            trt.LayerType.MATRIX_MULTIPLY: trt.IMatrixMultiplyLayer,
+            trt.LayerType.RAGGED_SOFTMAX: trt.IRaggedSoftMaxLayer,
+            trt.LayerType.CONSTANT: trt.IConstantLayer,
+            trt.LayerType.RNN_V2: trt.IRNNv2Layer,
+            trt.LayerType.IDENTITY: trt.IIdentityLayer,
+            trt.LayerType.PLUGIN_V2: trt.IPluginV2Layer,
+            trt.LayerType.SLICE: trt.ISliceLayer,
+            trt.LayerType.SHAPE: trt.IShapeLayer,
+            trt.LayerType.PARAMETRIC_RELU: trt.IParametricReLULayer,
+            trt.LayerType.RESIZE: trt.IResizeLayer,
+        }
+
+        def is_special_attribute(attr):
+            return attr.startswith("__") and attr.endswith("__")
+
+        def is_valid_attribute(attr, layer):
+            if (
+                type(layer) == trt.IPoolingLayer
+                or type(layer) == trt.IConvolutionLayer
+                or type(layer) == trt.IDeconvolutionLayer
+            ):
+                if len(layer.get_input(0).shape) > 4:
+                    # 3D pooling uses padding_nd
+                    return attr not in ["padding", "stride", "window_size"]
+            if type(layer) == trt.IResizeLayer:
+                if layer.num_inputs > 1:
+                    return attr not in ["scales"]
+            if type(layer) == trt.ISliceLayer:
+                if layer.num_inputs > 1:
+                    return attr not in ["shape", "start", "stride"]
+            return True
+
+        logging.debug("Network Inputs: {:}".format(TensorRTRunnerV2.get_network_inputs(network)))
+        for layer in network:
+            if layer.type in LAYER_TYPE_CLASS_MAPPING:
+                layer.__class__ = LAYER_TYPE_CLASS_MAPPING[layer.type]
+            input_info = [
+                "{:}: {:} ({:})".format(layer.get_input(i).name, layer.get_input(i).shape, layer.get_input(i).dtype)
+                for i in range(layer.num_inputs)
+                if layer.get_input(i)
+            ]
+            output_info = [
+                "{:}: {:} ({:})".format(layer.get_output(i).name, layer.get_output(i).shape, layer.get_output(i).dtype)
+                for i in range(layer.num_outputs)
+                if layer.get_output(i)
+            ]
+            logging.info("{:} [Op: {:}]".format(layer.name, layer.type))
+            logging.info("\t{:} -> {:}".format(input_info, output_info))
+            attrs = dir(layer)
+            for attr in attrs:
+                # Exclude special attributes, as well as any attributes of the base layer class (those can be displayed above).
+                if (
+                    not is_special_attribute(attr)
+                    and not hasattr(trt.ILayer, attr)
+                    and is_valid_attribute(attr, layer)
+                ):
+                    logging.info("\t{:}.{:} = {:}".format(layer.name, attr, getattr(layer, attr)))
+
+        network_outputs = {network.get_output(i).name: network.get_output(i).shape for i in range(network.num_outputs)}
+        logging.debug("Network Outputs: {:}".format(network_outputs))
+
+    @staticmethod
+    def mark_layerwise(network):
+        # Layers within loops cannot be marked as network outputs.
+        # TODO: FIXME: This assumes that the network is topologically sorted.
+        LOOP_START_LAYERS = [trt.LayerType.TRIP_LIMIT, trt.LayerType.ITERATOR]
+        LOOP_END_LAYERS = [trt.LayerType.LOOP_OUTPUT]
+        num_layers_marked = 0
+        in_loop = False
+        for layer in network:
+            if layer.type in LOOP_START_LAYERS:
+                in_loop = True
+            elif layer.type in LOOP_END_LAYERS:
+                in_loop = False
+            for index in range(layer.num_outputs):
+                out = layer.get_output(index)
+                if not out.is_network_output and not in_loop:
+                    logging.debug("Marking {:} as an output".format(out.name))
+                    network.mark_output(out)
+                    num_layers_marked += 1
+        logging.debug("Running in layerwise mode. Marking {:} layers as outputs".format(num_layers_marked))
+
+    @staticmethod
+    def build_profile(builder, network, profile_shapes, default_shape_value=DEFAULT_SHAPE_VALUE):
+        def override_shape(shape):
+            return tuple([DEFAULT_SHAPE_VALUE if is_dimension_dynamic(dim) else dim for dim in shape])
+
+        def get_profile_shape(name):
+            if name not in profile_shapes:
+                return None
+            shapes = profile_shapes[name]
+            if not isinstance(shapes, list) or len(shapes) != 3:
+                logging.critical(
+                    "Profile values must be a list containing exactly 3 shapes (tuples or Dims), but received shapes: {:} for input: {:}.\nNote: profile was: {:}.\nNote: Network inputs were: {:}".format(
+                        shapes, name, profile_shapes, TensorRTRunnerV2.get_network_inputs(network)
+                    )
+                )
+            return shapes
+
+        profile = builder.create_optimization_profile()
+        for idx in range(network.num_inputs):
+            inp = network.get_input(idx)
+
+            if inp.is_shape_tensor:
+                shapes = get_profile_shape(inp.name)
+                if not shapes:
+                    rank = inp.shape[0]
+                    shapes = [(DEFAULT_SHAPE_VALUE,) * rank] * 3
+                    logging.warning(
+                        "Setting shape input to {:}. If this is incorrect, for shape input: {:}, please provide tuples for min, opt, and max shapes containing {:} elements".format(
+                            shapes[0], inp.name, rank
+                        ),
+                        mode=logging_mode.ONCE,
+                    )
+                min, opt, max = shapes
+                profile.set_shape(inp.name, min, opt, max)
+                inp.shape = opt
+                logging.info(
+                    "Setting shape input: {:} values to min: {:}, opt: {:}, max: {:}".format(inp.name, min, opt, max)
+                )
+            else:
+                shapes = get_profile_shape(inp.name)
+                if not shapes:
+                    shapes = [override_shape(inp.shape)] * 3
+                    logging.warning(
+                        "Overriding input shape {:} to {:}. If this is incorrect, for input tensor: {:}, please provide tuples for min, opt, and max shapes containing values: {:} with dynamic dimensions replaced,".format(
+                            inp.shape, shapes[0], inp.name, inp.shape
+                        ),
+                        mode=logging_mode.ONCE,
+                    )
+                min, opt, max = shapes
+                profile.set_shape(inp.name, min, opt, max)
+                inp.shape = opt
+                logging.info(
+                    "Setting input: {:} shape to min: {:}, opt: {:}, max: {:}".format(inp.name, min, opt, max)
+                )
+
+        if not profile:
+            logging.critical(
+                "Profile is not valid, please provide profile data. Note: profile was: {:}".format(profile_shapes)
+            )
+        return profile
diff --git a/tests/core/test_actions_api.py b/tests/unit/core/test_actions_api.py
similarity index 79%
rename from tests/core/test_actions_api.py
rename to tests/unit/core/test_actions_api.py
index 9683860f87ba..f4a8ad555ac2 100644
--- a/tests/core/test_actions_api.py
+++ b/tests/unit/core/test_actions_api.py
@@ -17,28 +17,34 @@
 # =============================================================================
 
 import os
+from unittest import TestCase
 
-import nemo
-from tests.common_setup import NeMoUnitTest
+import pytest
 
+from nemo.backends.pytorch.actions import PtActions
+from nemo.backends.pytorch.common import SequenceEmbedding
 
-class TestTrainers(NeMoUnitTest):
+
+@pytest.mark.usefixtures("neural_factory")
+class TestTrainers(TestCase):
+    @pytest.mark.unit
     def test_checkpointing(self):
         path = 'optimizer.pt'
-        optimizer = nemo.backends.pytorch.actions.PtActions()
+        optimizer = PtActions()
         optimizer.save_state_to(path)
         optimizer.step = 123
-        optimizer.epoch_num = 324
+        optimizer.epoch = 324
         optimizer.restore_state_from(path)
         self.assertEqual(optimizer.step, 0)
-        self.assertEqual(optimizer.epoch_num, 0)
+        self.assertEqual(optimizer.epoch, 0)
         self.assertEqual(len(optimizer.optimizers), 0)
         os.remove(path)
 
+    @pytest.mark.unit
     def test_multi_optimizer(self):
         path = 'optimizer.pt'
-        module = nemo.backends.pytorch.common.SequenceEmbedding(voc_size=8, hidden_size=16)
-        optimizer = nemo.backends.pytorch.actions.PtActions()
+        module = SequenceEmbedding(voc_size=8, hidden_size=16)
+        optimizer = PtActions()
         optimizer.create_optimizer("sgd", module, optimizer_params={"lr": 1.0})
         optimizer.create_optimizer("sgd", [module], optimizer_params={"lr": 2.0})
         optimizer.create_optimizer("novograd", [module], optimizer_params={"lr": 3.0})
@@ -47,7 +53,7 @@ def test_multi_optimizer(self):
         self.assertEqual(len(optimizer.optimizers), 5)
         optimizer.save_state_to(path)
         optimizer.step = 123
-        optimizer.epoch_num = 324
+        optimizer.epoch = 324
         for i, opt in enumerate(optimizer.optimizers):
             for param_group in opt.param_groups:
                 self.assertEqual(param_group['lr'], float(i + 1))
@@ -57,6 +63,6 @@ def test_multi_optimizer(self):
             for param_group in opt.param_groups:
                 self.assertEqual(param_group['lr'], float(i + 1))
         self.assertEqual(optimizer.step, 0)
-        self.assertEqual(optimizer.epoch_num, 0)
+        self.assertEqual(optimizer.epoch, 0)
         self.assertEqual(len(optimizer.optimizers), 5)
         os.remove(path)
diff --git a/tests/unit/core/test_deploy_export.py b/tests/unit/core/test_deploy_export.py
new file mode 100644
index 000000000000..9f4f29ff3e3f
--- /dev/null
+++ b/tests/unit/core/test_deploy_export.py
@@ -0,0 +1,367 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import copy
+import os
+from inspect import signature
+from collections import OrderedDict
+from pathlib import Path
+import urllib.request
+import numpy as np
+
+# git clone git@github.com:microsoft/onnxruntime.git
+# cd onnxruntime
+#
+# ./build.sh --update --build --config RelWithDebInfo  --build_shared_lib --parallel \
+#     --cudnn_home /usr/lib/x86_64-linux-gnu --cuda_home /usr/local/cuda \
+#     --tensorrt_home .../TensorRT --use_tensorrt --enable_pybind --build_wheel
+#
+# pip install --upgrade ./build/Linux/RelWithDebInfo/dist/*.whl
+import onnxruntime as ort
+import pytest
+import torch
+
+import nemo
+import nemo.collections.nlp as nemo_nlp
+import nemo.collections.nlp.nm.trainables.common.token_classification_nm
+import nemo.collections.tts as nemo_tts
+
+from nemo import logging
+from nemo.core import DeploymentFormat as DF
+from nemo.core import NeuralModule
+
+# Check if the required libraries and runtimes are installed.
+# Only initialize GPU after this runner is activated.
+__trt_pycuda_req_satisfied = True
+try:
+    import pycuda.autoinit
+
+    # This import causes pycuda to automatically manage CUDA context creation and cleanup.
+    import pycuda.driver as cuda
+
+    from .tensorrt_loaders import (
+        DefaultDataLoader,
+        DataLoaderCache,
+        OnnxFileLoader,
+        OnnxNetworkLoader,
+        BuildEngineLoader,
+    )
+    from .tensorrt_runner import TensorRTRunnerV2
+except Exception as e:
+    nemo.logging.error('Failed to import: `{}` ({})'.format(str(e), type(e)))
+    __trt_pycuda_req_satisfied = False
+
+# create decorator so that tests can be marked with the TRT requirement
+requires_trt = pytest.mark.skipif(
+    not __trt_pycuda_req_satisfied, reason="TensorRT/PyCuda library required to run test"
+)
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestDeployExport:
+    @torch.no_grad()
+    def __test_export_route(self, module, out_name, mode, input_example=None):
+        # select correct extension based on the output format
+        ext = {DF.ONNX: ".onnx", DF.TRTONNX: ".trt.onnx", DF.PYTORCH: ".pt", DF.TORCHSCRIPT: ".ts"}.get(mode, ".onnx")
+        out = Path(f"{out_name}{ext}")
+        out_name = str(out)
+
+        if out.exists():
+            os.remove(out)
+
+        module.eval()
+        torch.manual_seed(1)
+        deploy_input_example = input_example
+        if isinstance(input_example, OrderedDict):
+            deploy_input_example = tuple(input_example.values())
+            if len(deploy_input_example) == 1:
+                deploy_input_example = deploy_input_example[0]
+        elif isinstance(input_example, tuple):
+            deploy_input_example = input_example if len(input_example) > 1 else input_example[0]
+
+        sig = signature(module.forward)
+        pnum = len(sig.parameters)
+        outputs_fwd = module.forward(*deploy_input_example) if pnum > 2 else module.forward(deploy_input_example)
+        self.nf.deployment_export(
+            module=module, output=out_name, input_example=deploy_input_example, d_format=mode, output_example=None,
+        )
+
+        assert out.exists() == True
+
+        if mode == DF.TRTONNX:
+
+            data_loader = DefaultDataLoader()
+            loader_cache = DataLoaderCache(data_loader)
+            profile_shapes = OrderedDict()
+            names = list(module.input_ports) + list(module.output_ports)
+            names = list(
+                filter(
+                    lambda x: x
+                    not in (module._disabled_deployment_input_ports | module._disabled_deployment_output_ports),
+                    names,
+                )
+            )
+            if isinstance(input_example, tuple):
+                si = [tuple(input_example[i].shape) for i in range(len(input_example))]
+            elif isinstance(input_example, OrderedDict):
+                si = [tuple(input_example.values())[i].shape for i in range(len(input_example))]
+            else:
+                si = [tuple(input_example.shape)]
+            if isinstance(outputs_fwd, tuple):
+                fi = [tuple(outputs_fwd[i].shape) for i in range(len(outputs_fwd))]
+            else:
+                fi = [tuple(outputs_fwd.shape)]
+            si = si + fi
+            i = 0
+            for name in names:
+                profile_shapes[name] = [si[i]] * 3
+                i = i + 1
+
+            onnx_loader = OnnxFileLoader(out_name)
+            network_loader = OnnxNetworkLoader(onnx_loader, explicit_precision=False)
+            model_loader = BuildEngineLoader(
+                network_loader,
+                max_workspace_size=1 << 30,
+                fp16_mode=False,
+                int8_mode=False,
+                profile_shapes=profile_shapes,
+                write_engine=None,
+                calibrator=None,
+                layerwise=False,
+            )
+
+            with TensorRTRunnerV2(model_loader=model_loader) as active_runner:
+                input_metadata = active_runner.get_input_metadata()
+                if input_metadata is None:
+                    logging.critical("For {:}, get_input_metadata() returned None!".format(active_runner.name))
+                logging.debug("Runner Inputs: {:}".format(input_metadata))
+                feed_dict = loader_cache.load(iteration=0, input_metadata=input_metadata, input_example=input_example)
+                inputs = dict()
+                input_names = list(input_metadata.keys())
+                for i in range(len(input_names)):
+                    input_name = input_names[i]
+                    if input_name in module._disabled_deployment_input_ports:
+                        continue
+
+                    if isinstance(input_example, OrderedDict):
+                        for key in input_example.keys():
+                            if key in input_name:
+                                inputs[input_name] = input_example[key].cpu().numpy()
+                    elif isinstance(input_example, tuple):
+                        inputs[input_name] = input_example[i].cpu().numpy()
+                    else:
+                        inputs[input_name] = input_example.cpu().numpy()
+
+                out_dict = active_runner.infer(feed_dict=feed_dict, output=outputs_fwd)
+                for ov in out_dict.values():
+                    outputs_scr = torch.from_numpy(ov).cuda()
+                    break
+
+                outputs = []
+                outputs.append(copy.deepcopy(out_dict))
+                logging.debug(
+                    "Received outputs: {:}".format(
+                        ["{:}: {:}".format(name, out.shape) for name, out in out_dict.items()]
+                    )
+                )
+                logging.info("Output Buffers: {:}".format(outputs))
+
+            inpex = []
+            for ie in feed_dict.values():  # loader_cache.cache[0].values():
+                if ie.dtype.type is np.int32:
+                    inpex.append(torch.from_numpy(ie).long().cuda())
+                else:
+                    inpex.append(torch.from_numpy(ie).cuda())
+                if len(inpex) == len(input_example):
+                    break
+            inpex = tuple(inpex)
+            outputs_fwd = module.forward(*inpex)
+
+        elif mode == DF.ONNX:
+            # Must recompute because *module* might be different now
+            torch.manual_seed(1)
+            outputs_fwd = module.forward(*deploy_input_example) if pnum > 2 else module.forward(deploy_input_example)
+
+            sess_options = ort.SessionOptions()
+            sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
+            ort_session = ort.InferenceSession(out_name, sess_options, ['CUDAExecutionProvider'])
+            print('Execution Providers: ', ort_session.get_providers())
+            inputs = dict()
+            input_names = (
+                list(input_example.keys())
+                if isinstance(input_example, OrderedDict)
+                else list(module.input_ports.keys())
+            )
+            ort_inputs = ort_session.get_inputs()
+
+            for node_arg in ort_inputs:
+                ort_name = node_arg.name
+                for input_name in input_names:
+                    if input_name in ort_name or ort_name in input_name:
+                        break
+                if ort_name not in inputs:
+                    inputs[ort_name] = (
+                        input_example[input_name].cpu().numpy()
+                        if isinstance(input_example, OrderedDict)
+                        else input_example.cpu().numpy()
+                    )
+
+            output_names = None
+            outputs_scr = ort_session.run(output_names, inputs)
+            outputs_scr = torch.from_numpy(outputs_scr[0]).cuda()
+        elif mode == DF.TORCHSCRIPT:
+            tscr = torch.jit.load(out_name)
+            torch.manual_seed(1)
+            outputs_scr = (
+                tscr.forward(*tuple(input_example.values()))
+                if isinstance(input_example, OrderedDict)
+                else (
+                    tscr.forward(*input_example) if isinstance(input_example, tuple) else tscr.forward(input_example)
+                )
+            )
+        elif mode == DF.PYTORCH:
+            module.restore_from(out_name)
+            torch.manual_seed(1)
+            if isinstance(input_example, OrderedDict):
+                outputs_scr = module.forward(*tuple(input_example.values()))
+            elif isinstance(input_example, tuple) or isinstance(input_example, list):
+                outputs_scr = module.forward(*input_example)
+            else:
+                outputs_scr = module.forward(input_example)
+
+        outputs_scr = (
+            outputs_scr[0] if isinstance(outputs_scr, tuple) or isinstance(outputs_scr, list) else outputs_scr
+        )
+        outputs_fwd = (
+            outputs_fwd[0] if isinstance(outputs_fwd, tuple) or isinstance(outputs_fwd, list) else outputs_fwd
+        )
+
+        n = outputs_fwd.numel()
+        tol = 5.0e-3 if n < 10000 else (5.0e-2 if n < 100000 else (5.0e-1))
+
+        assert (outputs_scr - outputs_fwd).norm(p=2) < tol
+
+        if out.exists():
+            os.remove(out)
+
+    @pytest.mark.unit
+    @pytest.mark.run_only_on('GPU')
+    @pytest.mark.parametrize(
+        "input_example, module_name, df_type",
+        [
+            # TaylorNet export tests.
+            (torch.randn(4, 1), "TaylorNet", DF.PYTORCH),
+            # TokenClassifier export tests.
+            (torch.randn(16, 16, 512), "TokenClassifier", DF.ONNX),
+            (torch.randn(16, 16, 512), "TokenClassifier", DF.TORCHSCRIPT),
+            (torch.randn(16, 16, 512), "TokenClassifier", DF.PYTORCH),
+            pytest.param(torch.randn(16, 16, 512), "TokenClassifier", DF.TRTONNX, marks=requires_trt),
+            # JasperDecoderForCTC export tests.
+            (torch.randn(34, 1024, 1), "JasperDecoderForCTC", DF.ONNX),
+            (torch.randn(34, 1024, 1), "JasperDecoderForCTC", DF.TORCHSCRIPT),
+            (torch.randn(34, 1024, 1), "JasperDecoderForCTC", DF.PYTORCH),
+            pytest.param(torch.randn(34, 1024, 1), "JasperDecoderForCTC", DF.TRTONNX, marks=requires_trt),
+            # JasperEncoder export tests.
+            (torch.randn(16, 64, 256), "JasperEncoder", DF.ONNX),
+            (torch.randn(16, 64, 256), "JasperEncoder", DF.TORCHSCRIPT),
+            (torch.randn(16, 64, 256), "JasperEncoder", DF.PYTORCH),
+            pytest.param(torch.randn(16, 64, 256), "JasperEncoder", DF.TRTONNX, marks=requires_trt),
+            # QuartznetEncoder export tests.
+            (torch.randn(16, 64, 256), "QuartznetEncoder", DF.ONNX),
+            (torch.randn(16, 64, 256), "QuartznetEncoder", DF.TORCHSCRIPT),
+            (torch.randn(16, 64, 256), "QuartznetEncoder", DF.PYTORCH),
+            pytest.param(torch.randn(16, 64, 256), "QuartznetEncoder", DF.TRTONNX, marks=requires_trt),
+        ],
+    )
+    def test_module_export(self, tmpdir, input_example, module_name, df_type):
+        """ Tests the module export.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+
+                input_example: Input to be passed to TaylorNet.
+
+                module_name: Name of the module (section in config file).
+
+                df_type: Parameter denoting type of export to be tested.
+        """
+        # Create neural module instance.
+        module = NeuralModule.import_from_config("tests/configs/test_deploy_export.yaml", module_name)
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join(module_name))
+        input_example = input_example.cuda() if input_example is not None else input_example
+        # Test export.
+        self.__test_export_route(
+            module=module, out_name=tmp_file_name, mode=df_type, input_example=input_example,
+        )
+
+    @pytest.mark.unit
+    @pytest.mark.run_only_on('GPU')
+    @pytest.mark.parametrize("df_type", [DF.ONNX, DF.TORCHSCRIPT, DF.PYTORCH])
+    def test_hf_bert(self, tmpdir, df_type):
+        """ Tests BERT export.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+
+                df_type: Parameter denoting type of export to be tested.
+        """
+        bert = nemo.collections.nlp.nm.trainables.common.huggingface.BERT(pretrained_model_name="bert-base-uncased")
+        input_example = OrderedDict(
+            [
+                ("input_ids", torch.randint(low=0, high=16, size=(2, 16)).cuda()),
+                ("token_type_ids", torch.randint(low=0, high=2, size=(2, 16)).cuda()),
+                ("attention_mask", torch.randint(low=0, high=2, size=(2, 16)).cuda()),
+            ]
+        )
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.mkdir("export").join("bert"))
+        # Test export.
+        self.__test_export_route(module=bert, out_name=tmp_file_name, mode=df_type, input_example=input_example)
+
+    @pytest.mark.unit
+    @pytest.mark.run_only_on('GPU')
+    @pytest.mark.parametrize("df_type", [DF.TORCHSCRIPT, DF.PYTORCH])
+    #
+    # TODO WaveGlow.infer uses torch.randn which is required to be seeded
+    # for deterministic results. It gets translated to ONNX op like this:
+    #
+    #   %16020 = RandomNormalLike[dtype = 1](%16019)
+    #
+    # There is no way to seed it, thus to validate ONNX test flow
+    # please use torch.ones
+    #
+    # @pytest.mark.parametrize("df_type", [DF.ONNX, DF.TORCHSCRIPT, DF.PYTORCH])
+    #
+    def test_waveglow(self, tmpdir, df_type):
+        url = "https://api.ngc.nvidia.com/v2/models/nvidia/waveglow_ljspeech/versions/2/files/WaveGlowNM.pt"
+        ptfile = "./WaveGlowNM.pt"
+        if not Path(ptfile).is_file():
+            urllib.request.urlretrieve(url, ptfile)
+
+        module = nemo_tts.WaveGlowInferNM(sample_rate=22050)
+        module.restore_from(ptfile)
+        module.eval()
+
+        torch.manual_seed(1)
+        mel = torch.randn(1, 80, 96).cuda()
+
+        input_example = OrderedDict([("mel_spectrogram", mel)])
+        tmp_file_name = str(tmpdir.mkdir("export").join("waveglow"))
+
+        self.__test_export_route(module=module, out_name=tmp_file_name, mode=df_type, input_example=input_example)
diff --git a/tests/unit/core/test_model.py b/tests/unit/core/test_model.py
new file mode 100644
index 000000000000..4c3333ed9d31
--- /dev/null
+++ b/tests/unit/core/test_model.py
@@ -0,0 +1,60 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+
+import pytest
+from ruamel.yaml import YAML
+
+from nemo.collections.asr.models import ASRConvCTCModel
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeMoModels:
+    @pytest.mark.unit
+    def test_quartznet_creation(self):
+        yaml = YAML(typ="safe")
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../examples/asr/configs/jasper_an4.yaml"))
+        ) as file:
+            model_definition = yaml.load(file)
+        model = ASRConvCTCModel(
+            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
+            encoder_params=model_definition['JasperEncoder'],
+            decoder_params=model_definition['JasperDecoderForCTC'],
+        )
+        assert model.num_weights > 0
+        assert len(model.modules) == 3
+
+    @pytest.mark.unit
+    def test_quartznet_nemo_file_export_and_import(self, tmpdir):
+        yaml = YAML(typ="safe")
+        with open(
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../examples/asr/configs/jasper_an4.yaml"))
+        ) as file:
+            model_definition = yaml.load(file)
+        model = ASRConvCTCModel(
+            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
+            encoder_params=model_definition['JasperEncoder'],
+            decoder_params=model_definition['JasperDecoderForCTC'],
+        )
+        nemo_file = str(tmpdir.mkdir("tmp_export_import").join("deleteme.nemo"))
+        model.save_to(nemo_file)
+        assert os.path.exists(nemo_file)
+        new_qn = ASRConvCTCModel.from_pretrained(model_info=nemo_file)
+        assert model.num_weights == new_qn.num_weights
diff --git a/tests/unit/core/test_nemo_callbacks.py b/tests/unit/core/test_nemo_callbacks.py
new file mode 100755
index 000000000000..0c5edea77801
--- /dev/null
+++ b/tests/unit/core/test_nemo_callbacks.py
@@ -0,0 +1,243 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import shutil
+from io import StringIO
+
+import pytest
+from tensorboard.backend.event_processing import event_file_inspector as efi
+from torch.utils.tensorboard import SummaryWriter
+
+from nemo.backends.pytorch.nm import NonTrainableNM
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core.callbacks import *
+from nemo.core.neural_types import ChannelType, NeuralType
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNeMoCallbacks:
+    @pytest.fixture()
+    def clean_up(self):
+        yield
+        self.nf.reset_trainer()
+
+    @pytest.mark.unit
+    def test_SimpleLogger(self, clean_up):
+        data_source = RealFunctionDataLayer(n=100, batch_size=1)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        # Mock up both std and stderr streams.
+        with logging.patch_stdout_handler(StringIO()) as std_out:
+            self.nf.train(
+                tensors_to_optimize=[loss_tensor],
+                callbacks=[SimpleLogger(step_freq=1)],
+                optimization_params={"max_steps": 4, "lr": 0.01},
+                optimizer="sgd",
+            )
+
+        output_lines = std_out.getvalue().splitlines()
+        assert len(output_lines) == 4
+        for line in output_lines:
+            assert "loss" in line
+
+    @pytest.mark.unit
+    def test_rename_and_log(self, clean_up):
+        data_source = RealFunctionDataLayer(n=100, batch_size=1)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        class DummyNM(NonTrainableNM):
+            def __init__(self):
+                super().__init__()
+
+            @property
+            def input_ports(self):
+                """Returns definitions of module input ports.
+
+                Returns:
+                  A (dict) of module's input ports names to NeuralTypes mapping
+                """
+                return {"x": NeuralType(('B', 'D'), ChannelType())}
+
+            @property
+            def output_ports(self):
+                """Returns definitions of module output ports.
+
+                Returns:
+                  A (dict) of module's output ports names to NeuralTypes mapping
+                """
+                return {"y_pred": NeuralType(('B', 'D'), ChannelType())}
+
+            def forward(self, x):
+                return x + 1
+
+        test = DummyNM()
+        extra_tensor = test(x=y_pred)
+
+        y_pred.rename("y_pred")
+        assert y_pred.name == "y_pred"
+
+        # Mock up both std and stderr streams.
+        with logging.patch_stdout_handler(StringIO()) as std_out:
+            self.nf.train(
+                tensors_to_optimize=[loss_tensor],
+                callbacks=[SimpleLogger(step_freq=1, tensors_to_log=['y_pred', extra_tensor])],
+                optimization_params={"max_steps": 4, "lr": 0.01},
+                optimizer="sgd",
+            )
+
+        output_lines = std_out.getvalue().splitlines()
+        assert len(output_lines) == 8
+        for i, line in enumerate(output_lines):
+            if i % 2 == 0:
+                assert y_pred.name in line
+            else:
+                assert extra_tensor.name in line
+
+    @pytest.mark.unit
+    def test_TensorboardLogger(self, clean_up, tmpdir):
+        data_source = RealFunctionDataLayer(n=100, batch_size=1)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        logging_dir = tmpdir.mkdir("temp")
+
+        writer = SummaryWriter(logging_dir)
+
+        tb_logger = TensorboardLogger(writer, step_freq=1)
+        callbacks = [tb_logger]
+
+        self.nf.train(
+            tensors_to_optimize=[loss_tensor],
+            callbacks=callbacks,
+            optimization_params={"max_steps": 4, "lr": 0.01},
+            optimizer="sgd",
+        )
+
+        # efi.inspect("temp", tag="loss")
+        inspection_units = efi.get_inspection_units(str(logging_dir), "", "loss")
+
+        # Make sure there is only 1 tensorboard file
+        assert len(inspection_units) == 1
+
+        # Assert that there the loss scalars has been logged 4 times
+        assert len(inspection_units[0].field_to_obs['scalars']) == 4
+
+    @pytest.mark.unit
+    def test_epoch_decorators(self, clean_up):
+        data_source = RealFunctionDataLayer(n=24, batch_size=12)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        epoch_start_counter = [0]
+        epoch_end_counter = [0]
+
+        @on_epoch_start
+        def count_epoch_starts(state, counter=epoch_start_counter):
+            counter[0] += 1
+
+        @on_epoch_end
+        def count_epoch_ends(state, counter=epoch_end_counter):
+            counter[0] -= 1
+
+        callbacks = [count_epoch_starts, count_epoch_ends]
+
+        self.nf.train(
+            tensors_to_optimize=[loss_tensor],
+            callbacks=callbacks,
+            optimization_params={"max_steps": 4, "lr": 0.01},
+            optimizer="sgd",
+        )
+
+        assert epoch_start_counter[0] == 2
+        assert epoch_end_counter[0] == -2
+
+    @pytest.mark.unit
+    def test_step_batch_decorators(self, clean_up):
+        """Showcase the difference between step and batch"""
+        data_source = RealFunctionDataLayer(n=24, batch_size=12)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        epoch_step_counter = [0]
+        epoch_batch_counter = [0]
+
+        @on_step_end
+        def count_steps(state, counter=epoch_step_counter):
+            counter[0] += 1
+
+        @on_batch_end
+        def count_batches(state, counter=epoch_batch_counter):
+            counter[0] += 1
+
+        callbacks = [count_steps, count_batches]
+
+        self.nf.train(
+            tensors_to_optimize=[loss_tensor],
+            callbacks=callbacks,
+            optimization_params={"max_steps": 4, "lr": 0.01},
+            optimizer="sgd",
+        )
+
+        # when grad accumlation steps (aka iter_per_step or batches_per_step) = 1, num_steps == num_batches
+        assert epoch_step_counter[0] == 4
+        assert epoch_batch_counter[0] == 4
+
+        epoch_step_counter[0] = 0
+        epoch_batch_counter[0] = 0
+
+        self.nf.train(
+            tensors_to_optimize=[loss_tensor],
+            callbacks=callbacks,
+            optimization_params={"max_steps": 4, "lr": 0.01},
+            optimizer="sgd",
+            reset=True,
+            batches_per_step=2,
+        )
+
+        # when grad accumlation steps != 1, num_steps != num_batches
+        assert epoch_step_counter[0] == 4
+        assert epoch_batch_counter[0] == 8
diff --git a/tests/core/test_neural_types.py b/tests/unit/core/test_neural_types.py
similarity index 82%
rename from tests/core/test_neural_types.py
rename to tests/unit/core/test_neural_types.py
index 133e747db3fe..67ee8567fc12 100644
--- a/tests/core/test_neural_types.py
+++ b/tests/unit/core/test_neural_types.py
@@ -15,13 +15,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-import nemo
+
+from unittest import TestCase
+
+import pytest
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
 from nemo.core.neural_types import (
     AcousticEncodedRepresentation,
     AudioSignal,
     AxisKind,
+    AxisKindAbstract,
     AxisType,
     ChannelType,
+    ElementType,
     MelSpectrogramType,
     MFCCSpectrogramType,
     NeuralPortNmTensorMismatchError,
@@ -30,10 +37,10 @@
     SpectrogramType,
     VoidType,
 )
-from tests.common_setup import NeMoUnitTest
 
 
-class NeuralTypeSystemTests(NeMoUnitTest):
+class NeuralTypeSystemTests(TestCase):
+    @pytest.mark.unit
     def test_short_vs_long_version(self):
         long_version = NeuralType(
             axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)),
@@ -43,6 +50,7 @@ def test_short_vs_long_version(self):
         self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME)
         self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME)
 
+    @pytest.mark.unit
     def test_parameterized_type_audio_sampling_frequency(self):
         audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000))
         audio8K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(8000))
@@ -53,17 +61,20 @@ def test_parameterized_type_audio_sampling_frequency(self):
         self.assertEqual(another16K.compare(audio16K), NeuralTypeComparisonResult.SAME)
         self.assertEqual(audio16K.compare(another16K), NeuralTypeComparisonResult.SAME)
 
+    @pytest.mark.unit
     def test_transpose_same_1(self):
         type1 = NeuralType(axes=('B', 'T', 'C'))
         type2 = NeuralType(axes=('T', 'B', 'C'))
         self.assertEqual(type1.compare(type2), NeuralTypeComparisonResult.TRANSPOSE_SAME)
         self.assertEqual(type2.compare(type1), NeuralTypeComparisonResult.TRANSPOSE_SAME)
 
+    @pytest.mark.unit
     def test_transpose_same_2(self):
         audio16K = NeuralType(axes=('B', 'T'), elements_type=AudioSignal(16000))
         audio16K_t = NeuralType(axes=('T', 'B'), elements_type=AudioSignal(16000))
         self.assertEqual(audio16K.compare(audio16K_t), NeuralTypeComparisonResult.TRANSPOSE_SAME)
 
+    @pytest.mark.unit
     def test_inheritance_spec_augment_example(self):
         input = NeuralType(('B', 'D', 'T'), SpectrogramType())
         out1 = NeuralType(('B', 'D', 'T'), MelSpectrogramType())
@@ -75,12 +86,14 @@ def test_inheritance_spec_augment_example(self):
         self.assertEqual(out1.compare(input), NeuralTypeComparisonResult.LESS)
         self.assertEqual(out2.compare(input), NeuralTypeComparisonResult.LESS)
 
+    @pytest.mark.unit
     def test_singletone(self):
         loss_output1 = NeuralType(axes=None)
         loss_output2 = NeuralType(axes=None)
         self.assertEqual(loss_output1.compare(loss_output2), NeuralTypeComparisonResult.SAME)
         self.assertEqual(loss_output2.compare(loss_output1), NeuralTypeComparisonResult.SAME)
 
+    @pytest.mark.unit
     def test_list_of_lists(self):
         T1 = NeuralType(
             axes=(
@@ -105,6 +118,7 @@ def test_list_of_lists(self):
         # TODO: should this be incompatible instead???
         self.assertEqual(T1.compare(T2), NeuralTypeComparisonResult.TRANSPOSE_SAME)
 
+    @pytest.mark.unit
     def test_void(self):
         btc_spctr = NeuralType(('B', 'T', 'C'), SpectrogramType())
         btc_spct_bad = NeuralType(('B', 'T'), SpectrogramType())
@@ -113,6 +127,7 @@ def test_void(self):
         self.assertEqual(btc_spctr.compare(btc_void), NeuralTypeComparisonResult.INCOMPATIBLE)
         self.assertEqual(btc_void.compare(btc_spct_bad), NeuralTypeComparisonResult.INCOMPATIBLE)
 
+    @pytest.mark.unit
     def test_big_void(self):
         big_void_1 = NeuralType(elements_type=VoidType())
         big_void_2 = NeuralType()
@@ -150,24 +165,26 @@ def test_big_void(self):
         self.assertEqual(big_void_2.compare(t1), NeuralTypeComparisonResult.SAME)
         self.assertEqual(big_void_2.compare(t2), NeuralTypeComparisonResult.SAME)
 
+    @pytest.mark.unit
     def test_dag(self):
-        data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128)
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
+        data_source = RealFunctionDataLayer(n=10000, batch_size=128)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
         x, y = data_source()
         y_pred = trainable_module(x=x)
         _ = loss(predictions=y_pred, target=y)
 
         def wrong():
-            data_source = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(n=10000, batch_size=128)
-            trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-            loss = nemo.backends.pytorch.tutorials.MSELoss()
+            data_source = RealFunctionDataLayer(n=10000, batch_size=128)
+            trainable_module = TaylorNet(dim=4)
+            loss = MSELoss()
             x, y = data_source()
             loss_tensor = loss(predictions=x, target=x)
             _ = trainable_module(x=loss_tensor)
 
         self.assertRaises(NeuralPortNmTensorMismatchError, wrong)
 
+    @pytest.mark.unit
     def test_unspecified_dimensions(self):
         t0 = NeuralType(
             (AxisType(AxisKind.Batch, 64), AxisType(AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)),
@@ -177,6 +194,7 @@ def test_unspecified_dimensions(self):
         self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.SAME)
         self.assertEqual(t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE)
 
+    @pytest.mark.unit
     def test_any_axis(self):
         t0 = NeuralType(('B', 'Any', 'Any'), VoidType())
         t1 = NeuralType(('B', 'Any', 'Any'), SpectrogramType())
@@ -186,3 +204,40 @@ def test_any_axis(self):
         self.assertEqual(t1.compare(t2), NeuralTypeComparisonResult.SAME)
         self.assertEqual(t2.compare(t1), NeuralTypeComparisonResult.INCOMPATIBLE)
         self.assertEqual(t1.compare(t0), NeuralTypeComparisonResult.INCOMPATIBLE)
+
+    @pytest.mark.unit
+    def test_struct(self):
+        class BoundingBox(ElementType):
+            def __str__(self):
+                return "bounding box from detection model"
+
+            def fields(self):
+                return ("X", "Y", "W", "H")
+
+        # ALSO ADD new, user-defined, axis kind
+        class AxisKind2(AxisKindAbstract):
+            Image = 0
+
+        T1 = NeuralType(
+            elements_type=BoundingBox(),
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                AxisType(kind=AxisKind2.Image, size=None, is_list=True),
+            ),
+        )
+
+        class BadBoundingBox(ElementType):
+            def __str__(self):
+                return "bad bounding box from detection model"
+
+            def fields(self):
+                return ("X", "Y", "H")
+
+        T2 = NeuralType(
+            elements_type=BadBoundingBox(),
+            axes=(
+                AxisType(kind=AxisKind.Batch, size=None, is_list=True),
+                AxisType(kind=AxisKind2.Image, size=None, is_list=True),
+            ),
+        )
+        self.assertEqual(T2.compare(T1), NeuralTypeComparisonResult.INCOMPATIBLE)
diff --git a/tests/unit/core/test_nm_tensor.py b/tests/unit/core/test_nm_tensor.py
new file mode 100644
index 000000000000..647905fa51c2
--- /dev/null
+++ b/tests/unit/core/test_nm_tensor.py
@@ -0,0 +1,133 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.backends.pytorch.tutorials import MSELoss, RealFunctionDataLayer, TaylorNet
+from nemo.core.neural_types import NeuralTypeComparisonResult
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestNmTensor:
+    @pytest.mark.unit
+    def test_nm_tensors_producer_args(self):
+        """
+            Tests whether nmTensors are correct - producers and their args.
+        """
+        # Create modules.
+        data_source = RealFunctionDataLayer(n=100, batch_size=1)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        loss_tensor = loss(predictions=y_pred, target=y)
+
+        # check producers' bookkeeping
+        assert loss_tensor.producer_name == loss.name
+        assert loss_tensor.producer_args == {"predictions": y_pred, "target": y}
+        assert y_pred.producer_name == trainable_module.name
+        assert y_pred.producer_args == {"x": x}
+        assert y.producer_name == data_source.name
+        assert y.producer_args == {}
+        assert x.producer_name == data_source.name
+        assert x.producer_args == {}
+
+    @pytest.mark.unit
+    def test_simple_train_named_output(self):
+        """ Test named output """
+        data_source = RealFunctionDataLayer(n=10, batch_size=1)
+        # Get data
+        data = data_source()
+
+        # Check output class naming coherence.
+        assert type(data).__name__ == 'RealFunctionDataLayerOutput'
+
+        # Check types.
+        assert data.x.compare(data_source.output_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert data.y.compare(data_source.output_ports["y"]) == NeuralTypeComparisonResult.SAME
+
+    @pytest.mark.unit
+    def test_nm_tensors_producer_consumers(self):
+        """
+            Tests whether nmTensors are correct - checking producers and consumers.
+        """
+        # Create modules.
+        data_source = RealFunctionDataLayer(n=10, batch_size=1, name="source")
+        trainable_module = TaylorNet(dim=4, name="tm")
+        loss = MSELoss(name="loss")
+        loss2 = MSELoss(name="loss2")
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        lss = loss(predictions=y_pred, target=y)
+        lss2 = loss2(predictions=y_pred, target=y)
+
+        # Check tensor x producer and consumers.
+        p = x.producer_step_module_port
+        cs = x.consumers
+        assert p.module_name == "source"
+        assert p.port_name == "x"
+        assert len(cs) == 1
+        assert cs[0].module_name == "tm"
+        assert cs[0].port_name == "x"
+
+        # Check tensor y producer and consumers.
+        p = y.producer_step_module_port
+        cs = y.consumers
+        assert p.module_name == "source"
+        assert p.port_name == "y"
+        assert len(cs) == 2
+        assert cs[0].module_name == "loss"
+        assert cs[0].port_name == "target"
+        assert cs[1].module_name == "loss2"
+        assert cs[1].port_name == "target"
+
+        # Check tensor y_pred producer and consumers.
+        p = y_pred.producer_step_module_port
+        cs = y_pred.consumers
+        assert p.module_name == "tm"
+        assert p.port_name == "y_pred"
+        assert len(cs) == 2
+        assert cs[0].module_name == "loss"
+        assert cs[0].port_name == "predictions"
+        assert cs[1].module_name == "loss2"
+        assert cs[1].port_name == "predictions"
+
+    @pytest.mark.unit
+    def test_nm_tensors_types(self):
+        """
+            Tests whether nmTensors are correct - checking type property.
+        """
+        # Create modules.
+        data_source = RealFunctionDataLayer(n=10, batch_size=1)
+        trainable_module = TaylorNet(dim=4)
+        loss = MSELoss()
+
+        # Create the graph by connnecting the modules.
+        x, y = data_source()
+        y_pred = trainable_module(x=x)
+        lss = loss(predictions=y_pred, target=y)
+
+        # Check types.
+        assert x.ntype.compare(data_source.output_ports["x"]) == NeuralTypeComparisonResult.SAME
+        assert y.ntype.compare(data_source.output_ports["y"]) == NeuralTypeComparisonResult.SAME
+        assert y_pred.ntype.compare(trainable_module.output_ports["y_pred"]) == NeuralTypeComparisonResult.SAME
+        assert lss.ntype.compare(loss.output_ports["loss"]) == NeuralTypeComparisonResult.SAME
diff --git a/tests/core/test_policies.py b/tests/unit/core/test_policies.py
similarity index 61%
rename from tests/core/test_policies.py
rename to tests/unit/core/test_policies.py
index 27a8748c7845..b75dfbd88c9a 100644
--- a/tests/core/test_policies.py
+++ b/tests/unit/core/test_policies.py
@@ -16,11 +16,21 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo.utils.lr_policies import CosineAnnealing, SquareAnnealing, WarmupAnnealing
-from tests.common_setup import NeMoUnitTest
+from unittest import TestCase
 
+import pytest
 
-class TestPolicies(NeMoUnitTest):
+from nemo.utils.lr_policies import (
+    CosineAnnealing,
+    PolynomialDecayAnnealing,
+    PolynomialHoldDecayAnnealing,
+    SquareAnnealing,
+    WarmupAnnealing,
+)
+
+
+class TestPolicies(TestCase):
+    @pytest.mark.unit
     def test_square(self):
         policy = SquareAnnealing(100)
         lr1, lr2, lr3 = (policy(1e-3, x, 0) for x in (0, 10, 20))
@@ -28,17 +38,34 @@ def test_square(self):
         self.assertTrue(lr2 >= lr3)
         self.assertTrue(lr1 - lr2 >= lr2 - lr3)
 
+    @pytest.mark.unit
     def test_working(self):
         total_steps = 1000
-        lr_policy_cls = [SquareAnnealing, CosineAnnealing, WarmupAnnealing]
+        lr_policy_cls = [
+            SquareAnnealing,
+            CosineAnnealing,
+            WarmupAnnealing,
+            PolynomialDecayAnnealing,
+            PolynomialHoldDecayAnnealing,
+        ]
         lr_policies = [p(total_steps=total_steps) for p in lr_policy_cls]
 
         for step in range(1000):
             for p in lr_policies:
                 assert p(1e-3, step, 0) > 0
 
+    @pytest.mark.unit
     def test_warmup(self):
         policy = SquareAnnealing(100, warmup_ratio=0.5)
         lr1, lr2, lr3 = (policy(1e-3, x, 0) for x in (0, 50, 100))
         self.assertTrue(lr1 < lr2)
         self.assertTrue(lr2 > lr3)
+
+    @pytest.mark.unit
+    def test_warmup_hold(self):
+        policy = PolynomialHoldDecayAnnealing(1000, warmup_ratio=0.25, hold_ratio=0.25, power=2)
+        lr1, lr2, lr3, lr4 = (policy(1e-3, x, 0) for x in (0, 250, 500, 1000))
+        self.assertTrue(lr1 < lr2)
+        self.assertTrue(lr2 == lr3)
+        self.assertTrue(lr4 < lr3)
+        self.assertTrue(lr4 == 0.0)
diff --git a/tests/core/test_weight_share.py b/tests/unit/core/test_weight_share.py
similarity index 93%
rename from tests/core/test_weight_share.py
rename to tests/unit/core/test_weight_share.py
index 6317052ae77d..165db51f923b 100644
--- a/tests/core/test_weight_share.py
+++ b/tests/unit/core/test_weight_share.py
@@ -20,24 +20,25 @@
 import shutil
 import tarfile
 from typing import Dict
+from unittest import TestCase
 
 import numpy as np
+import pytest
 import torch
 from ruamel.yaml import YAML
 
 import nemo
 import nemo.collections.asr as nemo_asr
 from nemo.backends.pytorch.nm import DataLayerNM
-from nemo.collections.nlp.nm.losses import PaddedSmoothedCrossEntropyLossNM
+from nemo.collections.nlp.nm.losses import SmoothedCrossEntropyLoss
 from nemo.collections.nlp.nm.trainables.common import TokenClassifier
 from nemo.core import WeightShareTransform
 from nemo.core.neural_types import *
-from tests.common_setup import NeMoUnitTest
+from nemo.utils import logging
 
-logging = nemo.logging
 
-
-class TestWeightSharing(NeMoUnitTest):
+@pytest.mark.usefixtures("neural_factory")
+class TestWeightSharing(TestCase):
     labels = [
         "'",
         "a",
@@ -68,7 +69,7 @@ class TestWeightSharing(NeMoUnitTest):
         "z",
         " ",
     ]
-    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
+    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data/asr/an4_train.json"))
     featurizer_config = {
         'window': 'hann',
         'dither': 1e-05,
@@ -86,7 +87,7 @@ class TestWeightSharing(NeMoUnitTest):
     @classmethod
     def setUpClass(cls) -> None:
         super().setUpClass()
-        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data/"))
         logging.info("Looking up for test ASR data")
         if not os.path.exists(os.path.join(data_folder, "asr")):
             logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr")))
@@ -116,6 +117,7 @@ def __check_if_weights_are_equal(self, w1: Dict, w2: Dict):
                 )
         return all_same
 
+    @pytest.mark.unit
     def test_TaylorNet_get_weights(self):
         tn1 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
         tn2 = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
@@ -139,6 +141,7 @@ def test_TaylorNet_get_weights(self):
     #     tn2.fc1.bias.data = torch.tensor([0.1])
     #     self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights()))
 
+    @pytest.mark.unit
     def test_tie_weights(self):
         class DummyDataLayer(DataLayerNM):
             def __init__(self, vocab_size):
@@ -181,7 +184,7 @@ def data_iterator(self):
         embd = nemo.backends.pytorch.common.other.SequenceEmbedding(voc_size=voc_size, hidden_size=dim)
         proj = TokenClassifier(hidden_size=dim, num_classes=voc_size)
         data = DummyDataLayer(voc_size)
-        loss = PaddedSmoothedCrossEntropyLossNM(0)
+        loss = SmoothedCrossEntropyLoss(pad_id=0)
         embd.tie_weights_with(
             proj,
             weight_names=["embedding.weight"],
@@ -193,9 +196,10 @@ def data_iterator(self):
         _in, _out = data()
         pred = embd(input_seq=_in)
         pred = proj(hidden_states=pred)
-        loss_t = loss(target_ids=_out, logits=pred)
+        loss_t = loss(labels=_out, logits=pred)
 
-        self.nf.train(
+        optimizer = nemo.backends.pytorch.actions.PtActions()
+        optimizer.train(
             [loss_t], optimizer="sgd", optimization_params={"max_steps": 5, "lr": 0.0003},
         )
 
@@ -203,6 +207,7 @@ def data_iterator(self):
             np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
         )
 
+    @pytest.mark.unit
     def test_untied_weights(self):
         class DummyDataLayer(DataLayerNM):
             def __init__(self, vocab_size):
@@ -245,7 +250,7 @@ def data_iterator(self):
         embd = nemo.backends.pytorch.common.other.SequenceEmbedding(voc_size=voc_size, hidden_size=dim)
         proj = TokenClassifier(hidden_size=dim, num_classes=voc_size)
         data = DummyDataLayer(voc_size)
-        loss = PaddedSmoothedCrossEntropyLossNM(0)
+        loss = SmoothedCrossEntropyLoss(pad_id=0)
         # embd.tie_weights_with(
         #     proj,
         #     weight_names=["embedding.weight"],
@@ -257,9 +262,10 @@ def data_iterator(self):
         _in, _out = data()
         pred = embd(input_seq=_in)
         pred = proj(hidden_states=pred)
-        loss_t = loss(target_ids=_out, logits=pred)
+        loss_t = loss(labels=_out, logits=pred)
 
-        self.nf.train(
+        optimizer = nemo.backends.pytorch.actions.PtActions()
+        optimizer.train(
             [loss_t], optimizer="sgd", optimization_params={"max_steps": 5, "lr": 0.0003},
         )
 
@@ -267,6 +273,7 @@ def data_iterator(self):
             np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
         )
 
+    @pytest.mark.unit
     def test_set_weights(self):
         voc_size = 3
         dim = 2
@@ -278,8 +285,10 @@ def test_set_weights(self):
         weights = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0)
         self.assertFalse(np.array_equal(embd.embedding.weight.detach().cpu().numpy(), weights.detach().cpu().numpy()))
 
+    @pytest.mark.unit
+    @pytest.mark.run_only_on('GPU')
     def test_freeze_unfreeze_TrainableNM(self):
-        path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml"))
+        path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../data/jasper_smaller.yaml"))
         with open(path) as file:
             jasper_model_definition = self.yaml.load(file)
         dl = nemo_asr.AudioToTextDataLayer(
@@ -326,7 +335,7 @@ def test_freeze_unfreeze_TrainableNM(self):
         callback = nemo.core.SimpleLossLoggerCallback(
             tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
         )
-        optimizer = self.nf.get_trainer()
+        optimizer = nemo.backends.pytorch.actions.PtActions()
         optimizer.train(
             [loss], callbacks=[callback], optimizer="sgd", optimization_params={"max_steps": 5, "lr": 0.0003},
         )
diff --git a/tests/nlp/test_bert.py b/tests/unit/test_huggingface.py
similarity index 90%
rename from tests/nlp/test_bert.py
rename to tests/unit/test_huggingface.py
index e84a679e2507..c73618c61c33 100644
--- a/tests/nlp/test_bert.py
+++ b/tests/unit/test_huggingface.py
@@ -16,11 +16,15 @@
 # limitations under the License.
 # =============================================================================
 
+from unittest import TestCase
+
+import pytest
+
 import nemo.collections.nlp as nemo_nlp
-from tests.common_setup import NeMoUnitTest
 
 
-class TestBert(NeMoUnitTest):
+class TestHuggingFace(TestCase):
+    @pytest.mark.unit
     def test_list_pretrained_models(self):
         pretrained_models = nemo_nlp.nm.trainables.huggingface.BERT.list_pretrained_models()
         self.assertTrue(len(pretrained_models) > 0)
diff --git a/tests/nlp/test_spc_tokenizer.py b/tests/unit/test_spc_tokenizer.py
similarity index 84%
rename from tests/nlp/test_spc_tokenizer.py
rename to tests/unit/test_spc_tokenizer.py
index e291ce267d92..4a6bbae06453 100644
--- a/tests/nlp/test_spc_tokenizer.py
+++ b/tests/unit/test_spc_tokenizer.py
@@ -16,21 +16,26 @@
 # limitations under the License.
 # =============================================================================
 
+from unittest import TestCase
+
+import pytest
+
 import nemo.collections.nlp as nemo_nlp
 from nemo.collections.nlp.data import SentencePieceTokenizer
-from tests.common_setup import NeMoUnitTest
 
 
-class TestSPCTokenizer(NeMoUnitTest):
+class TestSPCTokenizer(TestCase):
+    @pytest.mark.unit
     def test_add_special_tokens(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
         self.assertTrue(tokenizer.vocab_size == tokenizer.original_vocab_size + len(set(special_tokens.values())))
 
+    @pytest.mark.unit
     def test_text_to_tokens(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
 
         text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
@@ -41,6 +46,7 @@ def test_text_to_tokens(self):
         self.assertTrue(tokens.count("[MASK]") == 1)
         self.assertTrue(tokens.count("[SEP]") == 2)
 
+    @pytest.mark.unit
     def test_tokens_to_text(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
 
@@ -50,9 +56,10 @@ def test_tokens_to_text(self):
 
         self.assertTrue(text == result)
 
+    @pytest.mark.unit
     def test_text_to_ids(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
 
         text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
@@ -63,9 +70,10 @@ def test_text_to_ids(self):
         self.assertTrue(ids.count(tokenizer.token_to_id("[MASK]")) == 1)
         self.assertTrue(ids.count(tokenizer.token_to_id("[SEP]")) == 2)
 
+    @pytest.mark.unit
     def test_ids_to_text(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
 
         text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
@@ -74,9 +82,10 @@ def test_ids_to_text(self):
 
         self.assertTrue(text == result)
 
+    @pytest.mark.unit
     def test_tokens_to_ids(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
 
         text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
@@ -88,9 +97,10 @@ def test_tokens_to_ids(self):
         self.assertTrue(ids.count(tokenizer.token_to_id("[MASK]")) == 1)
         self.assertTrue(ids.count(tokenizer.token_to_id("[SEP]")) == 2)
 
+    @pytest.mark.unit
     def test_ids_to_tokens(self):
         tokenizer = SentencePieceTokenizer("./tests/data/m_common.model")
-        special_tokens = nemo_nlp.utils.MODEL_SPECIAL_TOKENS['bert']
+        special_tokens = nemo_nlp.data.tokenizers.MODEL_SPECIAL_TOKENS['bert']
         tokenizer.add_special_tokens(special_tokens)
 
         text = "[CLS] a b c [MASK] e f [SEP] g h i [SEP]"
diff --git a/tests/unit/test_torch_backend.py b/tests/unit/test_torch_backend.py
new file mode 100644
index 000000000000..f8ba63cf86b7
--- /dev/null
+++ b/tests/unit/test_torch_backend.py
@@ -0,0 +1,71 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+from numpy import array_equal
+
+from nemo.backends import get_state_dict, load, save, set_state_dict
+from nemo.backends.pytorch.tutorials import TaylorNet
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestTorchBackend:
+    @pytest.mark.unit
+    def test_state_dict(self):
+        """
+            Tests whether the get/set_state_dict proxy functions work properly.
+        """
+        # Module.
+        fx = TaylorNet(dim=4)
+
+        # Get state dict.
+        state_dict1 = get_state_dict(fx)
+
+        # Set state dict.
+        set_state_dict(fx, state_dict1)
+
+        # Compare state dicts.
+        state_dict2 = get_state_dict(fx)
+        for key in state_dict1.keys():
+            assert array_equal(state_dict1[key].cpu().numpy(), state_dict2[key].cpu().numpy())
+
+    @pytest.mark.unit
+    def test_save_load(self, tmpdir):
+        """
+            Tests whether the save and load proxy functions work properly.
+
+            Args:
+                tmpdir: Fixture which will provide a temporary directory.
+        """
+        # Module.
+        fx = TaylorNet(dim=4)
+
+        # Generate filename in the temporary directory.
+        tmp_file_name = str(tmpdir.join("tsl_taylornet.chkpt"))
+
+        # Save.
+        weights = get_state_dict(fx)
+        save(weights, tmp_file_name)
+
+        # Load.
+        loaded_weights = load(tmp_file_name)
+
+        # Compare state dicts.
+        for key in weights:
+            assert array_equal(weights[key].cpu().numpy(), loaded_weights[key].cpu().numpy())
diff --git a/tests/nlp/test_tutorials_pytorch.py b/tests/unit/test_tutorials_cornell_data.py
similarity index 79%
rename from tests/nlp/test_tutorials_pytorch.py
rename to tests/unit/test_tutorials_cornell_data.py
index a87dd9f0d4f6..dce45d18229e 100644
--- a/tests/nlp/test_tutorials_pytorch.py
+++ b/tests/unit/test_tutorials_cornell_data.py
@@ -16,15 +16,17 @@
 # limitations under the License.
 # =============================================================================
 
-from nemo import logging
+from unittest import TestCase
+
+import pytest
+
 from nemo.backends.pytorch.tutorials.chatbot.data import loadPrepareData
-from tests.common_setup import NeMoUnitTest
 
 
-class TestPytorchChatBotTutorial(NeMoUnitTest):
-    def test_simple_train(self):
+class TestTutorialCornellData(TestCase):
+    @pytest.mark.unit
+    def test_data_preparation(self):
         datafile = "tests/data/dialog_sample.txt"
-        logging.info(datafile)
-        voc, pairs = loadPrepareData("cornell", datafile=datafile)
+        voc, _ = loadPrepareData("cornell", datafile=datafile)
         self.assertEqual(voc.name, 'cornell')
         self.assertEqual(voc.num_words, 675)
diff --git a/tests/unit/test_unit_asr.py b/tests/unit/test_unit_asr.py
new file mode 100644
index 000000000000..ff6cc6985878
--- /dev/null
+++ b/tests/unit/test_unit_asr.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import shutil
+import tarfile
+import unittest
+from unittest import TestCase
+
+import pytest
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.parts import AudioDataset, WaveformFeaturizer, collections, parsers
+from nemo.core import DeviceType
+from nemo.utils import logging
+
+freq = 16000
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestUnitASRPytorch(TestCase):
+    labels = [
+        " ",
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z",
+        "'",
+    ]
+    manifest_filepath = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/an4_train.json"))
+    featurizer_config = {
+        'window': 'hann',
+        'dither': 1e-05,
+        'normalize': 'per_feature',
+        'frame_splicing': 1,
+        'int_values': False,
+        'window_stride': 0.01,
+        'sample_rate': freq,
+        'features': 64,
+        'n_fft': 512,
+        'window_size': 0.02,
+    }
+    yaml = YAML(typ="safe")
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test ASR data")
+        if not os.path.exists(os.path.join(data_folder, "asr")):
+            logging.info("Extracting ASR data to: {0}".format(os.path.join(data_folder, "asr")))
+            tar = tarfile.open(os.path.join(data_folder, "asr.tar.gz"), "r:gz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("ASR data found in: {0}".format(os.path.join(data_folder, "asr")))
+
+    @pytest.mark.unit
+    def test_transcript_normalizers(self):
+        # Create test json
+        test_strings = [
+            "TEST CAPITALIZATION",
+            '!\\"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~',
+            "3+3=10",
+            "3 + 3 = 10",
+            "why     is \\t whitepsace\\tsuch a problem   why indeed",
+            "\\\"Can you handle quotes?,\\\" says the boy",
+            "I Jump!!!!With joy?Now.",
+            "Maybe I want to learn periods.",
+            "$10 10.90 1-800-000-0000",
+            "18000000000 one thousand 2020",
+            "1 10 100 1000 10000 100000 1000000",
+            "Î  ĻƠvɆȩȅĘ ÀÁÃ Ą ÇĊňńŤŧș",
+            "‘’“”❛❜❝❞「 」 〈 〉 《 》 【 】 〔 〕 ⦗ ⦘ 😙  👀 🔨",
+            "It only costs $1 000 000! Cheap right?",
+            "2500, 3000 are separate but 200, 125 is not",
+            "1",
+            "1 2",
+            "1 2 3",
+            "10:00pm is 10:00 pm is 22:00 but not 10: 00 pm",
+            "10:00 10:01pm 10:10am 10:90pm",
+            "Mr. Expand me!",
+            "Mr Don't Expand me!",
+        ]
+        normalized_strings = [
+            "test capitalization",
+            'percent and \' plus',
+            "three plus three ten",
+            "three plus three ten",
+            "why is whitepsace such a problem why indeed",
+            "can you handle quotes says the boy",
+            "i jump with joy now",
+            "maybe i want to learn periods",
+            "ten dollars ten point nine zero one eight hundred zero zero",
+            "eighteen billion one thousand two thousand and twenty",
+            # Two line string below
+            "one ten thousand one hundred one thousand ten thousand one hundred thousand one million",
+            "i loveeee aaa a ccnntts",
+            "''",
+            "it only costs one million dollars cheap right",
+            # Two line string below
+            "two thousand five hundred three thousand are separate but two "
+            "hundred thousand one hundred and twenty five is not",
+            "one",
+            "one two",
+            "one two three",
+            "ten pm is ten pm is twenty two but not ten zero pm",
+            "ten ten one pm ten ten am ten ninety pm",
+            "mister expand me",
+            "mr don't expand me",
+        ]
+        manifest_paths = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/asr/manifest_test.json"))
+
+        def remove_test_json():
+            os.remove(manifest_paths)
+
+        self.addCleanup(remove_test_json)
+
+        with open(manifest_paths, "w") as f:
+            for s in test_strings:
+                f.write('{"audio_filepath": "", "duration": 1.0, "text": ' f'"{s}"}}\n')
+        parser = parsers.make_parser(self.labels, 'en')
+        manifest = collections.ASRAudioText(manifests_files=[manifest_paths], parser=parser,)
+
+        for i, s in enumerate(normalized_strings):
+            self.assertTrue(manifest[i].text_tokens == parser(s))
+
+    @pytest.mark.unit
+    def test_pytorch_audio_dataset(self):
+        featurizer = WaveformFeaturizer.from_config(self.featurizer_config)
+        ds = AudioDataset(manifest_filepath=self.manifest_filepath, labels=self.labels, featurizer=featurizer,)
+
+        for i in range(len(ds)):
+            if i == 5:
+                logging.info(ds[i])
+            # logging.info(ds[i][0].shape)
+            # self.assertEqual(freq, ds[i][0].shape[0])
+
+    @pytest.mark.unit
+    def test_dataloader(self):
+        batch_size = 4
+        dl = nemo_asr.AudioToTextDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=True,
+        )
+        for ind, data in enumerate(dl.data_iterator):
+            # With num_workers update, this is no longer true
+            # Moving to GPU is handled by AudioPreprocessor
+            # data is on GPU
+            # self.assertTrue(data[0].is_cuda)
+            # self.assertTrue(data[1].is_cuda)
+            # self.assertTrue(data[2].is_cuda)
+            # self.assertTrue(data[3].is_cuda)
+            # first dimension is batch
+            self.assertTrue(data[0].size(0) == batch_size)
+            self.assertTrue(data[1].size(0) == batch_size)
+            self.assertTrue(data[2].size(0) == batch_size)
+            self.assertTrue(data[3].size(0) == batch_size)
+
+    @pytest.mark.unit
+    def test_preprocessor_errors(self):
+        def create_broken_preprocessor_1():
+            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=2, n_window_size=2)
+
+        def create_broken_preprocessor_2():
+            nemo_asr.AudioToMelSpectrogramPreprocessor(window_stride=2, n_window_stride=2)
+
+        def create_broken_preprocessor_3():
+            nemo_asr.AudioToMelSpectrogramPreprocessor(n_window_stride=2)
+
+        def create_good_preprocessor_1():
+            nemo_asr.AudioToMelSpectrogramPreprocessor(window_size=0.02, window_stride=0.01)
+
+        def create_good_preprocessor_2():
+            nemo_asr.AudioToMelSpectrogramPreprocessor(
+                window_size=None, window_stride=None, n_window_size=256, n_window_stride=32,
+            )
+
+        self.assertRaises(ValueError, create_broken_preprocessor_1)
+        self.assertRaises(ValueError, create_broken_preprocessor_2)
+        self.assertRaises(ValueError, create_broken_preprocessor_3)
+        create_good_preprocessor_1()
+        create_good_preprocessor_2()
+
+    @pytest.mark.unit
+    def test_kaldi_dataloader(self):
+        batch_size = 4
+        dl = nemo_asr.KaldiFeatureDataLayer(
+            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
+            labels=self.labels,
+            batch_size=batch_size,
+        )
+        for data in dl.data_iterator:
+            self.assertTrue(data[0].size(0) == batch_size)
+
+        dl_test_min = nemo_asr.KaldiFeatureDataLayer(
+            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
+            labels=self.labels,
+            batch_size=batch_size,
+            min_duration=1.0,
+        )
+        self.assertTrue(len(dl_test_min) == 18)
+
+        dl_test_max = nemo_asr.KaldiFeatureDataLayer(
+            kaldi_dir=os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/kaldi_an4/')),
+            labels=self.labels,
+            batch_size=batch_size,
+            max_duration=5.0,
+        )
+        self.assertTrue(len(dl_test_max) == 19)
+
+    @pytest.mark.unit
+    def test_tarred_dataloader(self):
+        batch_size = 4
+        manifest_path = os.path.abspath(
+            os.path.join(os.path.dirname(__file__), '../data/asr/tarred_an4/tarred_audio_manifest.json')
+        )
+
+        # Test loading a single tarball
+        tarpath = os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/tarred_an4/audio_0.tar'))
+        dl_single_tar = nemo_asr.TarredAudioToTextDataLayer(
+            audio_tar_filepaths=tarpath, manifest_filepath=manifest_path, labels=self.labels, batch_size=batch_size
+        )
+        count = 0
+        for _ in dl_single_tar.dataset:
+            count += 1
+        self.assertTrue(count == 16)
+
+        # Test braceexpand loading
+        tarpath = os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/asr/tarred_an4/audio_{0..3}.tar'))
+        dl_braceexpand = nemo_asr.TarredAudioToTextDataLayer(
+            audio_tar_filepaths=tarpath, manifest_filepath=manifest_path, labels=self.labels, batch_size=batch_size
+        )
+        self.assertTrue(len(dl_braceexpand) == 65)
+        count = 0
+        for _ in dl_braceexpand.dataset:
+            count += 1
+        self.assertTrue(count == 65)
+
+        # Test loading via list
+        tarpath = [
+            os.path.abspath(os.path.join(os.path.dirname(__file__), f'../data/asr/tarred_an4/audio_{i}.tar'))
+            for i in range(4)
+        ]
+        dl_list_load = nemo_asr.TarredAudioToTextDataLayer(
+            audio_tar_filepaths=tarpath, manifest_filepath=manifest_path, labels=self.labels, batch_size=batch_size
+        )
+        count = 0
+        for _ in dl_braceexpand.dataset:
+            count += 1
+        self.assertTrue(count == 65)
+
+    @pytest.mark.unit
+    def test_trim_silence(self):
+        batch_size = 4
+        normal_dl = nemo_asr.AudioToTextDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=True,
+            shuffle=False,
+        )
+        trimmed_dl = nemo_asr.AudioToTextDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            trim_silence=True,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=True,
+            shuffle=False,
+        )
+        for norm, trim in zip(normal_dl.data_iterator, trimmed_dl.data_iterator):
+            for point in range(batch_size):
+                self.assertTrue(norm[1][point].data >= trim[1][point].data)
+
+    @pytest.mark.unit
+    def test_audio_preprocessors(self):
+        batch_size = 5
+        dl = nemo_asr.AudioToTextDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=True,
+            shuffle=False,
+        )
+
+        installed_torchaudio = True
+        try:
+            import torchaudio
+        except ModuleNotFoundError:
+            installed_torchaudio = False
+            with self.assertRaises(ModuleNotFoundError):
+                to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
+            with self.assertRaises(ModuleNotFoundError):
+                to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
+
+        if installed_torchaudio:
+            to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
+            to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
+
+        to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50)
+
+        for batch in dl.data_iterator:
+            input_signals, seq_lengths, _, _ = batch
+            input_signals = input_signals.to(to_melspec._device)
+            seq_lengths = seq_lengths.to(to_melspec._device)
+
+            melspec = to_melspec.forward(input_signals, seq_lengths)
+
+            if installed_torchaudio:
+                spec = to_spectrogram.forward(input_signals, seq_lengths)
+                mfcc = to_mfcc.forward(input_signals, seq_lengths)
+
+            # Check that number of features is what we expect
+            self.assertTrue(melspec[0].shape[1] == 50)
+
+            if installed_torchaudio:
+                self.assertTrue(spec[0].shape[1] == 201)  # n_fft // 2 + 1 bins
+                self.assertTrue(mfcc[0].shape[1] == 15)
diff --git a/tests/unit/test_unit_multidataset.py b/tests/unit/test_unit_multidataset.py
new file mode 100644
index 000000000000..1ef74caeadaf
--- /dev/null
+++ b/tests/unit/test_unit_multidataset.py
@@ -0,0 +1,164 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import shutil
+from unittest import TestCase
+
+import pytest
+import torch
+
+import nemo
+from nemo.backends.pytorch.common import DataCombination
+from nemo.core import ChannelType, NeuralType
+from nemo.utils import logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestMultiDLUnit(TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+
+    @pytest.mark.unit
+    def test_port_name_collision_handling(self):
+        batch_size = 4
+        dataset_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+            data_layers=[dl_1, dl_2],
+            batch_size=batch_size,
+            shuffle=shuffle,
+            combination_mode=DataCombination.CROSSPRODUCT,
+        )
+        self.assertEqual([*data_layer.output_ports], ["a", "b", "a_1", "c"])
+        self.assertEqual(len(data_layer), dataset_size * dataset_size)
+
+    @pytest.mark.unit
+    def test_port_renaming(self):
+        batch_size = 4
+        dataset_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+            data_layers=[dl_1, dl_2],
+            batch_size=batch_size,
+            shuffle=shuffle,
+            combination_mode=DataCombination.CROSSPRODUCT,
+            port_names=["1", "2", "3", "4"],
+        )
+        self.assertEqual([*data_layer.output_ports], ["1", "2", "3", "4"])
+
+    @pytest.mark.unit
+    def test_multi_dl_zip_working(self):
+        dataset_size_0 = 2
+        dataset_size_1 = 2
+        final_dataset_size = 2
+        batch_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_0,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_1,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+            data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode=DataCombination.ZIP
+        )
+        self.assertEqual(len(data_layer), final_dataset_size)
+
+    @pytest.mark.unit
+    def test_multi_dl_zip_failing(self):
+        dataset_size_0 = 4
+        dataset_size_1 = 2
+        batch_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_0,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_1,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        with pytest.raises(ValueError):
+            data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+                data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode=DataCombination.ZIP
+            )
+
+    @pytest.mark.unit
+    def test_multi_dl_wrong_combination(self):
+        dataset_size_0 = 2
+        dataset_size_1 = 2
+        unknown_combination = "cross"
+        batch_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_0,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
+        )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_1,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        with pytest.raises(ValueError):
+            data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+                data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode=unknown_combination
+            )
diff --git a/tests/unit/test_unit_speech_commands.py b/tests/unit/test_unit_speech_commands.py
new file mode 100644
index 000000000000..3077c08708b1
--- /dev/null
+++ b/tests/unit/test_unit_speech_commands.py
@@ -0,0 +1,262 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+import os
+import shutil
+import tarfile
+import unittest
+from unittest import TestCase
+
+import numpy as np
+import pytest
+from ruamel.yaml import YAML
+
+import nemo
+import nemo.collections.asr as nemo_asr
+from nemo.collections.asr.parts import AudioLabelDataset, WaveformFeaturizer, collections, parsers, perturb
+from nemo.core import DeviceType
+from nemo.utils import logging
+
+freq = 16000
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestSpeechCommandsPytorch(TestCase):
+    labels = [
+        "cat",
+        "dog",
+    ]
+    manifest_filepath = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "../data/speech_commands/train_manifest.json")
+    )
+    featurizer_config = {
+        'window': 'hann',
+        'dither': 1e-05,
+        'normalize': 'per_feature',
+        'frame_splicing': 1,
+        'int_values': False,
+        'window_stride': 0.01,
+        'sample_rate': freq,
+        'features': 64,
+        'n_fft': 512,
+        'window_size': 0.02,
+    }
+    yaml = YAML(typ="safe")
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test speech command data")
+        if not os.path.exists(os.path.join(data_folder, "speech_commands")):
+            logging.info(
+                "Extracting speech commands data to: {0}".format(os.path.join(data_folder, "speech_commands"))
+            )
+            tar = tarfile.open(os.path.join(data_folder, "speech_commands.tar.xz"), "r:xz")
+            tar.extractall(path=data_folder)
+            tar.close()
+        else:
+            logging.info("Speech Command data found in: {0}".format(os.path.join(data_folder, "speech_commands")))
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        super().tearDownClass()
+        data_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/"))
+        logging.info("Looking up for test ASR data")
+        if os.path.exists(os.path.join(data_folder, "speech_commands")):
+            shutil.rmtree(os.path.join(data_folder, "speech_commands"))
+
+    @pytest.mark.unit
+    def test_pytorch_audio_dataset_with_perturbation(self):
+        def construct_perturbed_dataset(perturbation):
+            if perturbation is not None:
+                # Execute perturbations with 100% probability
+                prob_perturb = [(1.0, perturbation)]
+                audio_augmentor = perturb.AudioAugmentor(prob_perturb)
+            else:
+                audio_augmentor = None
+
+            featurizer = WaveformFeaturizer(
+                sample_rate=self.featurizer_config['sample_rate'],
+                int_values=self.featurizer_config['int_values'],
+                augmentor=audio_augmentor,
+            )
+
+            ds = AudioLabelDataset(manifest_filepath=self.manifest_filepath, labels=self.labels, featurizer=featurizer)
+            return ds
+
+        baseline_ds = construct_perturbed_dataset(perturbation=None)
+        num_samples = len(baseline_ds)
+
+        # test white noise perturbation
+        white_noise_perturbation = perturb.WhiteNoisePerturbation(min_level=-90, max_level=-46)
+        white_noise_ds = construct_perturbed_dataset(white_noise_perturbation)
+        max_range = 10.0 ** (-46 / 20.0)
+        min_range = 10.0 ** (-90 / 20.0)
+        rng = np.random.RandomState(0)
+
+        for i in range(num_samples):
+            xp = white_noise_ds[i][0]
+            xp_max = rng.randn(xp.shape[0]) * max_range
+            xp_min = rng.randn(xp.shape[0]) * min_range
+
+            # Compute z statistic
+            z_max = (xp.mean() - xp_max.mean()) / np.sqrt(np.square(xp.std()) + np.square(xp_max.std()))
+            z_min = (xp.mean() - xp_min.mean()) / np.sqrt(np.square(xp.std()) + np.square(xp_min.std()))
+            self.assertTrue(z_max < 0.01)
+            self.assertTrue(z_min < 0.01)
+
+        # test shift perturbation
+        shift_perturbation = perturb.ShiftPerturbation(min_shift_ms=-5.0, max_shift_ms=5.0)
+        shift_ds = construct_perturbed_dataset(shift_perturbation)
+
+        for i in range(num_samples):
+            x = baseline_ds[i][0]
+            xp = shift_ds[i][0]
+            delta = np.abs(x - xp)
+            count_zeros = np.count_nonzero(delta == 0.0)
+            self.assertTrue(count_zeros >= 0)
+
+        # test time stretch perturbation
+        ts_perturbation = perturb.TimeStretchPerturbation(min_speed_rate=0.9, max_speed_rate=1.1, num_rates=4)
+        timestretch_ds = construct_perturbed_dataset(ts_perturbation)
+
+        for i in range(num_samples):
+            x = baseline_ds[i][0]
+            xp = timestretch_ds[i][0]
+            self.assertTrue((x.shape[0] > xp.shape[0]) or (x.shape[0] < xp.shape[0]))
+
+        # test speed perturbation
+        speed_perturbation = perturb.SpeedPerturbation(
+            sr=self.featurizer_config['sample_rate'],
+            resample_type='kaiser_fast',
+            min_speed_rate=0.9,
+            max_speed_rate=1.1,
+            num_rates=4,
+        )
+        speed_ds = construct_perturbed_dataset(speed_perturbation)
+
+        for i in range(num_samples):
+            x = baseline_ds[i][0]
+            xp = speed_ds[i][0]
+            self.assertTrue((x.shape[0] > xp.shape[0]) or (x.shape[0] < xp.shape[0]))
+
+    @pytest.mark.unit
+    def test_dataloader(self):
+        batch_size = 2
+        dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            sample_rate=16000,
+        )
+        for ind, data in enumerate(dl.data_iterator):
+            # With num_workers update, this is no longer true
+            # Moving to GPU is handled by AudioPreprocessor
+            # data is on GPU
+            # self.assertTrue(data[0].is_cuda)
+            # self.assertTrue(data[1].is_cuda)
+            # self.assertTrue(data[2].is_cuda)
+            # self.assertTrue(data[3].is_cuda)
+            # first dimension is batch
+            self.assertTrue(data[0].size(0) == batch_size)
+            self.assertTrue(data[1].size(0) == batch_size)
+            self.assertTrue(data[2].size(0) == batch_size)
+            self.assertTrue(data[3].size(0) == batch_size)
+
+    @pytest.mark.unit
+    def test_trim_silence(self):
+        batch_size = 2
+        normal_dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=False,
+            shuffle=False,
+        )
+        trimmed_dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            trim_silence=True,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=False,
+            shuffle=False,
+        )
+        for norm, trim in zip(normal_dl.data_iterator, trimmed_dl.data_iterator):
+            for point in range(batch_size):
+                self.assertTrue(norm[1][point].data >= trim[1][point].data)
+
+    @pytest.mark.unit
+    def test_audio_preprocessors(self):
+        batch_size = 2
+        dl = nemo_asr.AudioToSpeechLabelDataLayer(
+            # featurizer_config=self.featurizer_config,
+            manifest_filepath=self.manifest_filepath,
+            labels=self.labels,
+            batch_size=batch_size,
+            # placement=DeviceType.GPU,
+            drop_last=False,
+            shuffle=False,
+        )
+
+        installed_torchaudio = True
+        try:
+            import torchaudio
+        except ModuleNotFoundError:
+            installed_torchaudio = False
+            with self.assertRaises(ModuleNotFoundError):
+                to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
+            with self.assertRaises(ModuleNotFoundError):
+                to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
+
+        if installed_torchaudio:
+            to_spectrogram = nemo_asr.AudioToSpectrogramPreprocessor(n_fft=400, window=None)
+            to_mfcc = nemo_asr.AudioToMFCCPreprocessor(n_mfcc=15)
+            time_stretch_augment = nemo_asr.TimeStretchAugmentation(
+                self.featurizer_config['sample_rate'], probability=1.0, min_speed_rate=0.9, max_speed_rate=1.1
+            )
+
+        to_melspec = nemo_asr.AudioToMelSpectrogramPreprocessor(features=50)
+
+        for batch in dl.data_iterator:
+            input_signals, seq_lengths, _, _ = batch
+            input_signals = input_signals.to(to_melspec._device)
+            seq_lengths = seq_lengths.to(to_melspec._device)
+
+            melspec = to_melspec.forward(input_signals, seq_lengths)
+
+            if installed_torchaudio:
+                spec = to_spectrogram.forward(input_signals, seq_lengths)
+                mfcc = to_mfcc.forward(input_signals, seq_lengths)
+                ts_input_signals = time_stretch_augment.forward(input_signals, seq_lengths)
+
+            # Check that number of features is what we expect
+            self.assertTrue(melspec[0].shape[1] == 50)
+
+            if installed_torchaudio:
+                self.assertTrue(spec[0].shape[1] == 201)  # n_fft // 2 + 1 bins
+                self.assertTrue(mfcc[0].shape[1] == 15)
+
+                timesteps = ts_input_signals[0].shape[1]
+                self.assertTrue(timesteps <= int(1.15 * self.featurizer_config['sample_rate']))
+                self.assertTrue(timesteps >= int(0.85 * self.featurizer_config['sample_rate']))
diff --git a/tests/common_setup.py b/tests/unit/utils/test_app_state.py
similarity index 52%
rename from tests/common_setup.py
rename to tests/unit/utils/test_app_state.py
index 296259e9fbd9..e872425cc012 100644
--- a/tests/common_setup.py
+++ b/tests/unit/utils/test_app_state.py
@@ -1,7 +1,7 @@
 # ! /usr/bin/python
 # -*- coding: utf-8 -*-
-
-# Copyright 2020 NVIDIA. All Rights Reserved.
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,20 +16,21 @@
 # limitations under the License.
 # =============================================================================
 
-import unittest
-
-import nemo
+import pytest
 
-logging = nemo.logging
+from nemo.utils.app_state import AppState
 
 
-class NeMoUnitTest(unittest.TestCase):
-    def setUp(self) -> None:
-        """ Default setup - instantiates Neural Factory. """
-        # Initialize the default Neural Factory - on GPU.
-        self.nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.GPU)
-        # Reset loggers.
-        self.nf._exp_manager.reset_loggers()
+class TestAppState:
+    @pytest.mark.unit
+    def test_value_sharing(self):
+        # Create first instance of AppState.
+        x = AppState()
+        x.test_value = "ala"
+        # Create second instance of AppState and test value.
+        y = AppState()
+        assert y.test_value == "ala"
 
-        # Print standard header.
-        logging.info("-" * 20 + " " + self._testMethodName + " " + "-" * 20)
+        # Change second instance and test first one.
+        y.test_value = "ola"
+        assert x.test_value == "ola"
diff --git a/tests/core/test_deprecated.py b/tests/unit/utils/test_deprecated.py
similarity index 96%
rename from tests/core/test_deprecated.py
rename to tests/unit/utils/test_deprecated.py
index 45089c7d8b70..4f1c9490e60f 100644
--- a/tests/core/test_deprecated.py
+++ b/tests/unit/utils/test_deprecated.py
@@ -19,18 +19,21 @@
 
 import re
 from io import StringIO
+from unittest import TestCase
 from unittest.mock import patch
 
+import pytest
+
 from nemo import logging
 from nemo.utils.decorators import deprecated
-from tests.common_setup import NeMoUnitTest
 
 
-class DeprecatedTest(NeMoUnitTest):
+class DeprecatedTest(TestCase):
     NEMO_ERR_MSG_FORMAT = re.compile(
-        r"\[NeMo W [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} deprecated:[0-9]*\] "
+        r"\[NeMo W [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} deprecated:[0-9]+( rank:[0-9]+)?\] "
     )
 
+    @pytest.mark.unit
     def test_say_whee_deprecated(self):
         """ Tests whether both std and err streams return the right values
         when function is deprecated."""
@@ -58,6 +61,7 @@ def say_whee():
         else:
             raise ValueError("Test case could not find a match, did the format of nemo loggin messages change?")
 
+    @pytest.mark.unit
     def test_say_wow_twice_deprecated(self):
         """ Tests whether both std and err streams return the right values
         when a deprecated is called twice."""
@@ -94,6 +98,7 @@ def say_wow():
         # Check error output - should be empty.
         self.assertEqual(std_err.getvalue().strip(), '')
 
+    @pytest.mark.unit
     def test_say_whoopie_deprecated_version(self):
         """ Tests whether both std and err streams return the right values
         when function is deprecated and version is provided. """
@@ -123,6 +128,7 @@ def say_whoopie():
         else:
             raise ValueError("Test case could not find a match, did the format of nemo loggin messages change?")
 
+    @pytest.mark.unit
     def test_say_kowabunga_deprecated_explanation(self):
         """ Tests whether both std and err streams return the right values
         when function is deprecated and additional explanation is provided. """
diff --git a/tests/unit/utils/test_object_registry.py b/tests/unit/utils/test_object_registry.py
new file mode 100644
index 000000000000..0132a593bf6f
--- /dev/null
+++ b/tests/unit/utils/test_object_registry.py
@@ -0,0 +1,67 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+# =============================================================================
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import pytest
+
+from nemo.utils.neural_graph.object_registry import ObjectRegistry
+
+
+class TestAppState:
+    @pytest.mark.unit
+    def test_registry(self):
+        """ Tests registry reference management. """
+        # Crete new registry.
+        registry = ObjectRegistry("object")
+
+        class MockupObjectClass:
+            def __init__(self, name=None):
+                # Store name generated by the registry.
+                self.name = registry.register(self, name)
+
+        # Test object uniqueness.
+        c1 = MockupObjectClass("c1")
+        c1_ref = registry["c1"]
+        assert c1_ref is c1
+
+        # Test name uniqueness.
+        c2 = MockupObjectClass("c2")
+        with pytest.raises(NameError):
+            _ = MockupObjectClass("c2")
+
+        # Test unique names generation.
+        c3 = MockupObjectClass()
+        c4 = MockupObjectClass()
+        assert c4.name == "mockupobjectclass1"
+
+        # Check objects.
+        assert len(registry) == 4
+
+        # Delete all objects - aside of reference!
+        del c1
+        del c2
+        del c3
+        del c4
+        assert len(registry) == 1
+        # Assert that "c1" is still there, but "c4" is not.
+        registry["c1"]
+        with pytest.raises(KeyError):
+            registry["c4"]
+
+        # Delete the last object.
+        del c1_ref
+        assert len(registry) == 0