Update container version to 21.05 (#2309)

* Update container version Signed-off-by: smajumdar <[email protected]> * Temporarily change export format of waveglow Signed-off-by: smajumdar <[email protected]> * Add conda update for numba Signed-off-by: smajumdar <[email protected]> * Update numba compat via global flag for strictness level `--relax_numba_compat`, remove pytorchlightning.metrics, refactor out numba utils to core, update tests Signed-off-by: smajumdar <[email protected]> * Correct order of numba minimum verion, remove wrong flag from test Signed-off-by: smajumdar <[email protected]> * Double test of cuda numba Signed-off-by: smajumdar <[email protected]> * Double test of cuda numba Signed-off-by: smajumdar <[email protected]> * Enable RNNT tests Signed-off-by: smajumdar <[email protected]> Signed-off-by: Mike Chrzanowski <[email protected]>
NVIDIA · Jun 23, 2021 · 417ed17 · 417ed17
1 parent 6f077fb
commit 417ed17
Show file tree

Hide file tree

Showing 21 changed files with 168 additions and 73 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.03-py3
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.05-py3
 
 
 # build an image that includes only the nemo dependencies, ensures that dependencies

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -1,7 +1,7 @@
 pipeline {
   agent {
         docker {
-      image 'nvcr.io/nvidia/pytorch:21.03-py3'
+      image 'nvcr.io/nvidia/pytorch:21.05-py3'
       args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache/torch:/root/.cache/torch --shm-size=8g'
         }
   }
@@ -66,7 +66,7 @@ pipeline {
 
     stage('L0: Unit Tests GPU') {
       steps {
-        sh 'pytest -m "not pleasefixme" --with_downloads'
+        sh 'pytest -m "not pleasefixme" --with_downloads --relax_numba_compat'
       }
     }
 
@@ -78,7 +78,7 @@ pipeline {
         }
       }
       steps {
-        sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads'
+        sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat'
       }
     }
 
@@ -288,8 +288,8 @@ pipeline {
             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
             model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
             model.tokenizer.type="wpe" \
-            model.train_ds.batch_size=10 \
-            model.validation_ds.batch_size=10 \
+            model.train_ds.batch_size=4 \
+            model.validation_ds.batch_size=4 \
             trainer.gpus=[1] \
             +trainer.fast_dev_run=True \
             exp_manager.exp_dir=examples/asr/speech_to_text_wpe_conformer_results'
@@ -348,43 +348,47 @@ pipeline {
     }
 
 //  TODO: UNCOMMENT TESTS AFTER 21.04 release (numba 0.53 min requirement)
-//     stage('L2: ASR RNNT dev run') {
-//       when {
-//         anyOf {
-//           branch 'main'
-//           changeRequest target: 'main'
-//         }
-//       }
-//       failFast true
-//       parallel {
-//         stage('Speech to Text - RNNT') {
-//           steps {
-//             sh 'python examples/asr/speech_to_text_rnnt.py \
-//             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
-//             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
-//             model.train_ds.batch_size=8 \
-//             trainer.gpus=[0] \
-//             +trainer.fast_dev_run=True \
-//             exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_results'
-//             sh 'rm -rf examples/asr/speech_to_text_rnnt_results'
-//           }
-//         }
-//         stage('L2: Speech to Text RNNT WPE') {
-//           steps {
-//             sh 'python examples/asr/speech_to_text_rnnt_bpe.py \
-//             --config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt_bpe.yaml" \
-//             model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
-//             model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
-//             model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
-//             model.tokenizer.type="wpe" \
-//             trainer.gpus=[0] \
-//             +trainer.fast_dev_run=True \
-//             exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_wpe_results'
-//             sh 'rm -rf examples/asr/speech_to_text_rnnt_wpe_results'
-//           }
-//         }
-//       }
-//     }
+    stage('L2: ASR RNNT dev run') {
+      when {
+        anyOf {
+          branch 'main'
+          changeRequest target: 'main'
+        }
+      }
+      failFast true
+      parallel {
+        stage('Speech to Text - RNNT') {
+          steps {
+            sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt.py \
+            --config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt.yaml" \
+            model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
+            model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
+            model.train_ds.batch_size=2 \
+            model.validation_ds.batch_size=2 \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=True \
+            exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_results'
+            sh 'rm -rf examples/asr/speech_to_text_rnnt_results'
+          }
+        }
+        stage('L2: Speech to Text RNNT WPE') {
+          steps {
+            sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt_bpe.py \
+            --config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt_bpe.yaml" \
+            model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
+            model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
+            model.train_ds.batch_size=2 \
+            model.validation_ds.batch_size=2 \
+            model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
+            model.tokenizer.type="wpe" \
+            trainer.gpus=[0] \
+            +trainer.fast_dev_run=True \
+            exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_wpe_results'
+            sh 'rm -rf examples/asr/speech_to_text_rnnt_wpe_results'
+          }
+        }
+      }
+    }
 
     stage('L2: ASR Multi-dataloader dev run') {
       when {

diff --git a/README.rst b/README.rst
@@ -150,13 +150,13 @@ Use this installation mode if you are contributing to NeMo.
 Docker containers:
 ~~~~~~~~~~~~~~~~~~
 
-If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 21.03-py3 and then installing from GitHub.
+If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 21.05-py3 and then installing from GitHub.
 
 .. code-block:: bash
 
     docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
     -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
-    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.03-py3
+    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.05-py3
 
 Examples
 --------

diff --git a/docs/source/starthere/intro.rst b/docs/source/starthere/intro.rst
@@ -132,13 +132,13 @@ Use this installation mode if you are contributing to NeMo.
 
 Docker containers
 ~~~~~~~~~~~~~~~~~
-If you chose to work with the ``main`` branch, we recommend using `NVIDIA's PyTorch container version 21.03-py3 <https://ngc.nvidia.com/containers/nvidia:pytorch/tags>`_, then install from GitHub.
+If you chose to work with the ``main`` branch, we recommend using `NVIDIA's PyTorch container version 21.05-py3 <https://ngc.nvidia.com/containers/nvidia:pytorch/tags>`_, then install from GitHub.
 
 .. code-block:: bash
 
     docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
     -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
-    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.03-py3
+    stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.05-py3
 
 
 FAQ

diff --git a/nemo/collections/asr/losses/rnnt.py b/nemo/collections/asr/losses/rnnt.py
@@ -33,9 +33,9 @@
 
 import torch
 
-from nemo.collections.asr.parts.numba.numba_utils import NUMBA_INSTALLATION_MESSAGE
 from nemo.core.classes import Loss, typecheck
 from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, LossType, NeuralType
+from nemo.core.utils.numba_utils import NUMBA_INSTALLATION_MESSAGE
 from nemo.utils import logging, model_utils
 
 try:

diff --git a/nemo/collections/asr/models/classification_models.py b/nemo/collections/asr/models/classification_models.py
@@ -23,7 +23,7 @@
 import torch
 from omegaconf import DictConfig, ListConfig, OmegaConf
 from pytorch_lightning import Trainer
-from pytorch_lightning.metrics.regression import MeanAbsoluteError, MeanSquaredError
+from torchmetrics.regression import MeanAbsoluteError, MeanSquaredError
 
 from nemo.collections.asr.data import audio_to_label_dataset
 from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel

diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py
@@ -20,7 +20,6 @@
 import torch
 from packaging import version
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.spec_augment import SpecAugmentNumba, spec_augment_launch_heuristics
 from nemo.collections.asr.parts.preprocessing.features import FilterbankFeatures
 from nemo.collections.asr.parts.submodules.spectr_augment import SpecAugment, SpecCutout
@@ -33,6 +32,8 @@
     NeuralType,
     SpectrogramType,
 )
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 from nemo.utils import logging
 
 try:

diff --git a/nemo/collections/asr/parts/numba/__init__.py b/nemo/collections/asr/parts/numba/__init__.py
@@ -12,17 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import logging
-
-from nemo.collections.asr.parts.numba.numba_utils import (
-    NUMBA_INSTALLATION_MESSAGE,
-    numba_cuda_is_supported,
-    skip_numba_cuda_test_if_unsupported,
-)
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import RNNTLossNumba
-
-# Prevent Numba CUDA logs from showing at info level
-cuda_logger = logging.getLogger('numba.cuda.cudadrv.driver')
-cuda_logger.setLevel(logging.ERROR)  # only show error
-
-__NUMBA_MINIMUM_VERSION__ = "0.53.0"
diff --git a/nemo/core/utils/__init__.py b/nemo/core/utils/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo.core.utils import numba_utils
diff --git a/...ollections/asr/parts/numba/numba_utils.py → nemo/core/utils/numba_utils.py b/...ollections/asr/parts/numba/numba_utils.py → nemo/core/utils/numba_utils.py
@@ -12,10 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import contextlib
+import logging as pylogger
 import operator
+import os
 
 from nemo.utils import model_utils
 
+# Prevent Numba CUDA logs from showing at info level
+cuda_logger = pylogger.getLogger('numba.cuda.cudadrv.driver')
+cuda_logger.setLevel(pylogger.ERROR)  # only show error
+
+__NUMBA_MINIMUM_VERSION__ = "0.53.0"
+
+
 NUMBA_INSTALLATION_MESSAGE = (
     "Could not import `numba`.\n"
     "Please install numba in one of the following ways."
@@ -29,6 +39,48 @@
     "but this is not advised."
 )
 
+STRICT_NUMBA_COMPAT_CHECK = True
+
+# Get environment key if available
+if 'STRICT_NUMBA_COMPAT_CHECK' in os.environ:
+    check_str = os.environ.get('STRICT_NUMBA_COMPAT_CHECK')
+    check_bool = str(check_str).lower() in ("yes", "true", "t", "1")
+    STRICT_NUMBA_COMPAT_CHECK = check_bool
+
+
+def is_numba_compat_strict() -> bool:
+    """
+    Returns strictness level of numba cuda compatibility checks.
+
+    If value is true, numba cuda compatibility matrix must be satisfied.
+    If value is false, only cuda availability is checked, not compatibility.
+    Numba Cuda may still compile and run without issues in such a case, or it may fail.
+    """
+    return STRICT_NUMBA_COMPAT_CHECK
+
+
+def set_numba_compat_strictness(strict: bool):
+    """
+    Sets the strictness level of numba cuda compatibility checks.
+
+    If value is true, numba cuda compatibility matrix must be satisfied.
+    If value is false, only cuda availability is checked, not compatibility.
+    Numba Cuda may still compile and run without issues in such a case, or it may fail.
+
+    Args:
+        strict: bool value, whether to enforce strict compatibility checks or relax them.
+    """
+    global STRICT_NUMBA_COMPAT_CHECK
+    STRICT_NUMBA_COMPAT_CHECK = strict
+
+
+@contextlib.contextmanager
+def with_numba_compat_strictness(strict: bool):
+    initial_strictness = is_numba_compat_strict()
+    set_numba_compat_strictness(strict=strict)
+    yield
+    set_numba_compat_strictness(strict=initial_strictness)
+
 
 def numba_cuda_is_supported(min_version: str) -> bool:
     """
@@ -54,7 +106,17 @@ def numba_cuda_is_supported(min_version: str) -> bool:
         # this method first arrived in 0.53, and that's the minimum version required
         if hasattr(cuda, 'is_supported_version'):
             try:
-                return cuda.is_available() and cuda.is_supported_version()
+                cuda_available = cuda.is_available()
+                if cuda_available:
+                    cuda_compatible = cuda.is_supported_version()
+                else:
+                    cuda_compatible = False
+
+                if is_numba_compat_strict():
+                    return cuda_available and cuda_compatible
+                else:
+                    return cuda_available
+
             except OSError:
                 # dlopen(libcudart.dylib) might fail if CUDA was never installed in the first place.
                 return False

diff --git a/reinstall.sh b/reinstall.sh
@@ -32,4 +32,9 @@ fi
 echo 'Installing additional nemo_text_processing conda dependency'
 bash nemo_text_processing/setup.sh > /dev/null 2>&1 && echo "nemo_text_processing installed!" || echo "nemo_text_processing could not be installed!"
 
+if [ -x "$(command -v conda)" ]; then
+  echo 'Attempting update to numba installation via conda'
+  conda update -c numba numba -y >  /dev/null 2>&1 && echo "Numba updated!" || echo "Numba could not be updated!"
+fi
+
 echo 'All done!'
diff --git a/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py b/tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
@@ -16,9 +16,10 @@
 import pytest
 import torch
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_numpy import RNNTLoss as RNNTLoss_Numpy
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import RNNTLossNumba
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 DEVICES = ['cpu']
 

diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py b/tests/collections/asr/numba/rnnt_loss/utils/test_gpu_rnnt_kernel.py
@@ -17,10 +17,11 @@
 import torch
 from numba import cuda
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.rnnt_loss import rnnt_numpy
 from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import certify_inputs
 from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import gpu_rnnt_kernel, reduce
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 
 def log_softmax(x, axis=-1):

diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py b/tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py
@@ -16,8 +16,9 @@
 import pytest
 from numba import cuda
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import reduce
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 
 class TestRNNTCUDAReductions:

diff --git a/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py b/tests/collections/asr/numba/rnnt_loss/utils/test_rnnt_helper.py
@@ -16,8 +16,9 @@
 import pytest
 from numba import cuda
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.rnnt_loss.utils import global_constants, rnnt_helper
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 
 class TestRNNTHelper:

diff --git a/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py b/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py
@@ -16,8 +16,9 @@
 import torch
 from omegaconf import OmegaConf
 
-from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
 from nemo.collections.asr.parts.numba.spec_augment import spec_aug_numba
+from nemo.core.utils import numba_utils
+from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
 
 
 def get_cfg(seed=0, dtype='float32'):