Skip to content

Commit

Permalink
Update container version to 21.05 (#2309)
Browse files Browse the repository at this point in the history
* Update container version

Signed-off-by: smajumdar <[email protected]>

* Temporarily change export format of waveglow

Signed-off-by: smajumdar <[email protected]>

* Add conda update for numba

Signed-off-by: smajumdar <[email protected]>

* Update numba compat via global flag for strictness level `--relax_numba_compat`, remove pytorchlightning.metrics, refactor out numba utils to core, update tests

Signed-off-by: smajumdar <[email protected]>

* Correct order of numba minimum verion, remove wrong flag from test

Signed-off-by: smajumdar <[email protected]>

* Double test of cuda numba

Signed-off-by: smajumdar <[email protected]>

* Double test of cuda numba

Signed-off-by: smajumdar <[email protected]>

* Enable RNNT tests

Signed-off-by: smajumdar <[email protected]>
Signed-off-by: Mike Chrzanowski <[email protected]>
  • Loading branch information
titu1994 authored and Mike Chrzanowski committed Jun 23, 2021
1 parent 6f077fb commit 417ed17
Show file tree
Hide file tree
Showing 21 changed files with 168 additions and 73 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.03-py3
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.05-py3


# build an image that includes only the nemo dependencies, ensures that dependencies
Expand Down
88 changes: 46 additions & 42 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pipeline {
agent {
docker {
image 'nvcr.io/nvidia/pytorch:21.03-py3'
image 'nvcr.io/nvidia/pytorch:21.05-py3'
args '--device=/dev/nvidia0 --gpus all --user 0:128 -v /home/TestData:/home/TestData -v $HOME/.cache/torch:/root/.cache/torch --shm-size=8g'
}
}
Expand Down Expand Up @@ -66,7 +66,7 @@ pipeline {

stage('L0: Unit Tests GPU') {
steps {
sh 'pytest -m "not pleasefixme" --with_downloads'
sh 'pytest -m "not pleasefixme" --with_downloads --relax_numba_compat'
}
}

Expand All @@ -78,7 +78,7 @@ pipeline {
}
}
steps {
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads'
sh 'CUDA_VISIBLE_DEVICES="" pytest -m "not pleasefixme" --cpu --with_downloads --relax_numba_compat'
}
}

Expand Down Expand Up @@ -288,8 +288,8 @@ pipeline {
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
model.tokenizer.type="wpe" \
model.train_ds.batch_size=10 \
model.validation_ds.batch_size=10 \
model.train_ds.batch_size=4 \
model.validation_ds.batch_size=4 \
trainer.gpus=[1] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_wpe_conformer_results'
Expand Down Expand Up @@ -348,43 +348,47 @@ pipeline {
}

// TODO: UNCOMMENT TESTS AFTER 21.04 release (numba 0.53 min requirement)
// stage('L2: ASR RNNT dev run') {
// when {
// anyOf {
// branch 'main'
// changeRequest target: 'main'
// }
// }
// failFast true
// parallel {
// stage('Speech to Text - RNNT') {
// steps {
// sh 'python examples/asr/speech_to_text_rnnt.py \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// model.train_ds.batch_size=8 \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_results'
// sh 'rm -rf examples/asr/speech_to_text_rnnt_results'
// }
// }
// stage('L2: Speech to Text RNNT WPE') {
// steps {
// sh 'python examples/asr/speech_to_text_rnnt_bpe.py \
// --config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt_bpe.yaml" \
// model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
// model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
// model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
// model.tokenizer.type="wpe" \
// trainer.gpus=[0] \
// +trainer.fast_dev_run=True \
// exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_wpe_results'
// sh 'rm -rf examples/asr/speech_to_text_rnnt_wpe_results'
// }
// }
// }
// }
stage('L2: ASR RNNT dev run') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('Speech to Text - RNNT') {
steps {
sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt.py \
--config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt.yaml" \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
model.train_ds.batch_size=2 \
model.validation_ds.batch_size=2 \
trainer.gpus=[0] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_results'
sh 'rm -rf examples/asr/speech_to_text_rnnt_results'
}
}
stage('L2: Speech to Text RNNT WPE') {
steps {
sh 'STRICT_NUMBA_COMPAT_CHECK=false python examples/asr/speech_to_text_rnnt_bpe.py \
--config-path="experimental/contextnet_rnnt/" --config-name="config_rnnt_bpe.yaml" \
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
model.train_ds.batch_size=2 \
model.validation_ds.batch_size=2 \
model.tokenizer.dir="/home/TestData/asr_tokenizers/an4_wpe_128/" \
model.tokenizer.type="wpe" \
trainer.gpus=[0] \
+trainer.fast_dev_run=True \
exp_manager.exp_dir=examples/asr/speech_to_text_rnnt_wpe_results'
sh 'rm -rf examples/asr/speech_to_text_rnnt_wpe_results'
}
}
}
}

stage('L2: ASR Multi-dataloader dev run') {
when {
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ Use this installation mode if you are contributing to NeMo.
Docker containers:
~~~~~~~~~~~~~~~~~~

If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 21.03-py3 and then installing from GitHub.
If you chose to work with main branch, we recommend using NVIDIA's PyTorch container version 21.05-py3 and then installing from GitHub.

.. code-block:: bash
docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
-p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.03-py3
stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.05-py3
Examples
--------
Expand Down
4 changes: 2 additions & 2 deletions docs/source/starthere/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ Use this installation mode if you are contributing to NeMo.
Docker containers
~~~~~~~~~~~~~~~~~
If you chose to work with the ``main`` branch, we recommend using `NVIDIA's PyTorch container version 21.03-py3 <https://ngc.nvidia.com/containers/nvidia:pytorch/tags>`_, then install from GitHub.
If you chose to work with the ``main`` branch, we recommend using `NVIDIA's PyTorch container version 21.05-py3 <https://ngc.nvidia.com/containers/nvidia:pytorch/tags>`_, then install from GitHub.

.. code-block:: bash
docker run --gpus all -it --rm -v <nemo_github_folder>:/NeMo --shm-size=8g \
-p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit \
stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.03-py3
stack=67108864 --device=/dev/snd nvcr.io/nvidia/pytorch:21.05-py3
FAQ
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/losses/rnnt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

import torch

from nemo.collections.asr.parts.numba.numba_utils import NUMBA_INSTALLATION_MESSAGE
from nemo.core.classes import Loss, typecheck
from nemo.core.neural_types import LabelsType, LengthsType, LogprobsType, LossType, NeuralType
from nemo.core.utils.numba_utils import NUMBA_INSTALLATION_MESSAGE
from nemo.utils import logging, model_utils

try:
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/asr/models/classification_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import torch
from omegaconf import DictConfig, ListConfig, OmegaConf
from pytorch_lightning import Trainer
from pytorch_lightning.metrics.regression import MeanAbsoluteError, MeanSquaredError
from torchmetrics.regression import MeanAbsoluteError, MeanSquaredError

from nemo.collections.asr.data import audio_to_label_dataset
from nemo.collections.asr.models.asr_model import ASRModel, ExportableEncDecModel
Expand Down
3 changes: 2 additions & 1 deletion nemo/collections/asr/modules/audio_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import torch
from packaging import version

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.spec_augment import SpecAugmentNumba, spec_augment_launch_heuristics
from nemo.collections.asr.parts.preprocessing.features import FilterbankFeatures
from nemo.collections.asr.parts.submodules.spectr_augment import SpecAugment, SpecCutout
Expand All @@ -33,6 +32,8 @@
NeuralType,
SpectrogramType,
)
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__
from nemo.utils import logging

try:
Expand Down
13 changes: 0 additions & 13 deletions nemo/collections/asr/parts/numba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

from nemo.collections.asr.parts.numba.numba_utils import (
NUMBA_INSTALLATION_MESSAGE,
numba_cuda_is_supported,
skip_numba_cuda_test_if_unsupported,
)
from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import RNNTLossNumba

# Prevent Numba CUDA logs from showing at info level
cuda_logger = logging.getLogger('numba.cuda.cudadrv.driver')
cuda_logger.setLevel(logging.ERROR) # only show error

__NUMBA_MINIMUM_VERSION__ = "0.53.0"
15 changes: 15 additions & 0 deletions nemo/core/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo.core.utils import numba_utils
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import logging as pylogger
import operator
import os

from nemo.utils import model_utils

# Prevent Numba CUDA logs from showing at info level
cuda_logger = pylogger.getLogger('numba.cuda.cudadrv.driver')
cuda_logger.setLevel(pylogger.ERROR) # only show error

__NUMBA_MINIMUM_VERSION__ = "0.53.0"


NUMBA_INSTALLATION_MESSAGE = (
"Could not import `numba`.\n"
"Please install numba in one of the following ways."
Expand All @@ -29,6 +39,48 @@
"but this is not advised."
)

STRICT_NUMBA_COMPAT_CHECK = True

# Get environment key if available
if 'STRICT_NUMBA_COMPAT_CHECK' in os.environ:
check_str = os.environ.get('STRICT_NUMBA_COMPAT_CHECK')
check_bool = str(check_str).lower() in ("yes", "true", "t", "1")
STRICT_NUMBA_COMPAT_CHECK = check_bool


def is_numba_compat_strict() -> bool:
"""
Returns strictness level of numba cuda compatibility checks.
If value is true, numba cuda compatibility matrix must be satisfied.
If value is false, only cuda availability is checked, not compatibility.
Numba Cuda may still compile and run without issues in such a case, or it may fail.
"""
return STRICT_NUMBA_COMPAT_CHECK


def set_numba_compat_strictness(strict: bool):
"""
Sets the strictness level of numba cuda compatibility checks.
If value is true, numba cuda compatibility matrix must be satisfied.
If value is false, only cuda availability is checked, not compatibility.
Numba Cuda may still compile and run without issues in such a case, or it may fail.
Args:
strict: bool value, whether to enforce strict compatibility checks or relax them.
"""
global STRICT_NUMBA_COMPAT_CHECK
STRICT_NUMBA_COMPAT_CHECK = strict


@contextlib.contextmanager
def with_numba_compat_strictness(strict: bool):
initial_strictness = is_numba_compat_strict()
set_numba_compat_strictness(strict=strict)
yield
set_numba_compat_strictness(strict=initial_strictness)


def numba_cuda_is_supported(min_version: str) -> bool:
"""
Expand All @@ -54,7 +106,17 @@ def numba_cuda_is_supported(min_version: str) -> bool:
# this method first arrived in 0.53, and that's the minimum version required
if hasattr(cuda, 'is_supported_version'):
try:
return cuda.is_available() and cuda.is_supported_version()
cuda_available = cuda.is_available()
if cuda_available:
cuda_compatible = cuda.is_supported_version()
else:
cuda_compatible = False

if is_numba_compat_strict():
return cuda_available and cuda_compatible
else:
return cuda_available

except OSError:
# dlopen(libcudart.dylib) might fail if CUDA was never installed in the first place.
return False
Expand Down
5 changes: 5 additions & 0 deletions reinstall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,9 @@ fi
echo 'Installing additional nemo_text_processing conda dependency'
bash nemo_text_processing/setup.sh > /dev/null 2>&1 && echo "nemo_text_processing installed!" || echo "nemo_text_processing could not be installed!"

if [ -x "$(command -v conda)" ]; then
echo 'Attempting update to numba installation via conda'
conda update -c numba numba -y > /dev/null 2>&1 && echo "Numba updated!" || echo "Numba could not be updated!"
fi

echo 'All done!'
3 changes: 2 additions & 1 deletion tests/collections/asr/numba/rnnt_loss/test_rnnt_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
import pytest
import torch

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_numpy import RNNTLoss as RNNTLoss_Numpy
from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import RNNTLossNumba
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__

DEVICES = ['cpu']

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
import torch
from numba import cuda

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.rnnt_loss import rnnt_numpy
from nemo.collections.asr.parts.numba.rnnt_loss.rnnt_pytorch import certify_inputs
from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import gpu_rnnt_kernel, reduce
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__


def log_softmax(x, axis=-1):
Expand Down
3 changes: 2 additions & 1 deletion tests/collections/asr/numba/rnnt_loss/utils/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
import pytest
from numba import cuda

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.rnnt_loss.utils.cuda_utils import reduce
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__


class TestRNNTCUDAReductions:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
import pytest
from numba import cuda

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.rnnt_loss.utils import global_constants, rnnt_helper
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__


class TestRNNTHelper:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
import torch
from omegaconf import OmegaConf

from nemo.collections.asr.parts.numba import __NUMBA_MINIMUM_VERSION__, numba_utils
from nemo.collections.asr.parts.numba.spec_augment import spec_aug_numba
from nemo.core.utils import numba_utils
from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__


def get_cfg(seed=0, dtype='float32'):
Expand Down
Loading

0 comments on commit 417ed17

Please sign in to comment.