diff --git a/examples/README.md b/examples/README.md index 2cfe8d52ecc..7342bbe1e8a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -461,7 +461,7 @@ Intel® Neural Compressor validated examples with multiple compression technique RNN-T Speech Recognition Post-Training Dynamic / Static Quantization - eager / ipex + eager Wav2Vec2 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py deleted file mode 100644 index 14ce2478100..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py +++ /dev/null @@ -1,71 +0,0 @@ -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -from parts.manifest import Manifest -from parts.segment import AudioSegment - -import numpy as np - -import mlperf_loadgen as lg - - -class AudioQSL: - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels), - normalize=True, max_duration=15.0) - self.sample_rate = sample_rate - self.count = len(self.manifest) - perf_count = self.count if perf_count is None else perf_count - self.sample_id_to_sample = {} - self.qsl = lg.ConstructQSL(self.count, perf_count, - self.load_query_samples, - self.unload_query_samples) - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600, - self.count)) - - def load_query_samples(self, sample_list): - for sample_id in sample_list: - self.sample_id_to_sample[sample_id] = self._load_sample(sample_id) - - def unload_query_samples(self, sample_list): - for sample_id in sample_list: - del self.sample_id_to_sample[sample_id] - - def idxs(self): - return self.sample_id_to_sample.keys() - - def _load_sample(self, index): - sample = self.manifest[index] - segment = AudioSegment.from_file(sample['audio_filepath'][0], - target_sr=self.sample_rate) - waveform = segment.samples - assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32 - return waveform - - def __getitem__(self, index): - return self.sample_id_to_sample[index] - - def __del__(self): - lg.DestroyQSL(self.qsl) - print("Finished destroying QSL.") - -# We have no problem fitting all data in memory, so we do that, in -# order to speed up execution of the benchmark. -class AudioQSLInMemory(AudioQSL): - def __init__(self, dataset_dir, manifest_filepath, labels, - sample_rate=16000, perf_count=None): - super().__init__(dataset_dir, manifest_filepath, labels, - sample_rate, perf_count) - super().load_query_samples(range(self.count)) - - def load_query_samples(self, sample_list): - pass - - def unload_query_samples(self, sample_list): - pass diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md deleted file mode 100644 index fe734f8266d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# RNNT MLPerf Inference v1.1 - -> Note: not support IPEX 1.10, 1.11 - -## SW requirements -### -| SW |configuration | -|--|--| -| GCC | GCC 9.3 | - -## Steps to run RNNT - -### 1. Install anaconda 3.0 -``` - wget https://repo.continuum.io/archive/Anaconda3-5.0.0-Linux-x86_64.sh -O anaconda3.sh - chmod +x anaconda3.sh - ~/anaconda3.sh -b -p ~/anaconda3 - ~/anaconda3/bin/conda create -n rnnt python=3.7 - - export PATH=~/anaconda3/bin:$PATH - source ~/anaconda3/bin/activate rnnt -``` -### 2. Prepare code and environment -``` - cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex - bash prepare_env.sh -``` - -### 3. Install IPEX -refer [intel/intel-extension-for-pytorch at mlperf/inference-1.1 (github.com)](https://github.com/intel/intel-extension-for-pytorch/tree/mlperf/inference-1.1) - -1. install PyTorch1.8 and TorchVision0.9 - - refer [PyTorch install](https://pytorch.org/get-started/locally/) - ```shell position-relative - pip3 install torch==1.8.0+cpu torchvision==0.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html - ``` -2. Get Intel PyTorch Extension source and install - > **Note** - > - > GCC9 compiler is recommended - > - - ```shell position-relative - git clone https://github.com/intel/intel-extension-for-pytorch - cd intel-extension-for-pytorch - git checkout mlperf/inference-1.1 - git submodule sync - git submodule update --init --recursive - pip install lark-parser hypothesis - - python setup.py install - ``` - -### 4. Prepare model and dataset -``` - work_dir=mlperf-rnnt-librispeech - local_data_dir=$work_dir/local_data - mkdir -p $local_data_dir - librispeech_download_dir=. - # prepare model - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt - - # prepare inference dataset - wget https://www.openslr.org/resources/12/dev-clean.tar.gz - # suggest you check run.sh to locate the dataset - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $librispeech_download_dir \ - -e $local_data_dir --skip_download - python pytorch/utils/convert_librispeech.py \ - --input_dir $local_data_dir/LibriSpeech/dev-clean \ - --dest_dir $local_data_dir/dev-clean-wav \ - --output_json $local_data_dir/dev-clean-wav.json -``` - -### 5. tune RNN-T with Neural Compressor - Please update the setup_env_offline.sh or setup_env_server.sh and user.conf according to your platform resource. -``` - # offline - ./run_tuning.sh --dataset_location=$local_data_dir --input_model=$work_dir/rnnt.pt - # server scenario - ./run_tuning.sh --dataset_location=$local_data_dir --input_model=$work_dir/rnnt.pt --server -``` - -### 6. benchmark -``` -# fp32 benchmark -bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=benchmark -# int8+bf16 benchmark -bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=benchmark --int8=true -# fp32 accuracy -bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=accuracy -# int8+bf16 benchmark -bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=accuracy --int8=true - -``` - -### Note on Server scenario - -* Only quantized encoder and decoder is bf16 ops. -* For server scenario, we exploit the fact that incoming data have different sequence lengths (and inference times) by bucketing according to sequence length -and specifying batch size for each bucket such that latency can be satisfied. The settings are specified in machine.conf file and required fields -are cores_per_instance, num_instances, waveform_len_cutoff, batch_size. - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py deleted file mode 100644 index ea81792855b..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python - -import argparse -import array -import json -import sys -import os - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch")) - -from QSL import AudioQSL -from helpers import process_evaluation_epoch, __gather_predictions -from parts.manifest import Manifest - -dtype_map = { - "int8": 'b', - "int16": 'h', - "int32": 'l', - "int64": 'q', -} - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log_dir", required=True) - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type") - args = parser.parse_args() - return args - -def main(): - args = get_args() - labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] - qsl = AudioQSL(args.dataset_dir, args.manifest, labels) - manifest = qsl.manifest - with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh: - results = json.load(fh) - hypotheses = [] - references = [] - for result in results: - hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist()) - references.append(manifest[result["qsl_idx"]]["transcript"]) - - references = __gather_predictions([references], labels=labels) - hypotheses = __gather_predictions([hypotheses], labels=labels) - - d = dict(predictions=hypotheses, - transcripts=references) - wer = process_evaluation_epoch(d) - print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100)) - -if __name__ == '__main__': - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml deleted file mode 100644 index 56c9f2e1245..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml +++ /dev/null @@ -1,12 +0,0 @@ -version: 1.0 - -model: # mandatory. used to specify model specific information. - name: rnnt - framework: pytorch_ipex # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension. - -tuning: - accuracy_criterion: - relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%. - exit_policy: - timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit. - random_seed: 9527 # optional. random seed for deterministic tuning. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json deleted file mode 100644 index 1dc6dae0a85..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json +++ /dev/null @@ -1,52 +0,0 @@ -[ - { - "id": 0, - "name": "lstm", - "algorithm": "min_max", - "weight_granularity": "per_tensor", - "inputs_scale": [ - 16.95465660095215 - ], - "outputs_scale": [ - 16.95465660095215 - ], - "inputs_zero_point": [ - 80 - ], - "outputs_zero_point": [ - 80 - ], - "inputs_uint8_used": [ - true - ], - "outputs_uint8_used": [ - true - ], - "quantized": true - }, - { - "id": 1, - "name": "lstm", - "algorithm": "min_max", - "weight_granularity": "per_tensor", - "inputs_scale": [ - 128.29344177246094 - ], - "outputs_scale": [ - 128.29344177246094 - ], - "inputs_zero_point": [ - 126 - ], - "outputs_zero_point": [ - 126 - ], - "inputs_uint8_used": [ - true - ], - "outputs_uint8_used": [ - true - ], - "quantized": true - } -] \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml deleted file mode 100644 index dfadf861c39..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml +++ /dev/null @@ -1,128 +0,0 @@ -name: mlperf-rnnt -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=main - - absl-py=0.9.0=py36_0 - - blas=1.0=mkl - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2020.4.5.1=hecc5488_0 - - certifi=2020.4.5.1=py36h9f0ad1d_0 - - cffi=1.14.0=py36h2e261b9_0 - - cmake=3.14.0=h52cb24c_0 - - cudatoolkit=10.1.243=h6bb024c_0 - - cudatoolkit-dev=10.1.243=h516909a_3 - - expat=2.2.6=he6710b0_0 - - freetype=2.9.1=h8a8886c_1 - - gdb=8.3.1=py36h497da48_1 - - intel-openmp=2020.0=166 - - jpeg=9b=h024ee3a_2 - - krb5=1.17.1=h173b8e3_0 - - lame=3.100=h14c3975_1001 - - ld_impl_linux-64=2.33.1=h53a641e_7 - - libcurl=7.69.1=h20c2e04_0 - - libedit=3.1.20181209=hc058e9b_0 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=9.1.0=hdf63c60_0 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libpng=1.6.37=hbc83047_0 - - libssh2=1.9.0=h1ba5d50_1 - - libstdcxx-ng=9.1.0=hdf63c60_0 - - libtiff=4.1.0=h2733197_0 - - mad=0.15.1b=he1b5a44_0 - - mkl=2020.0=166 - - mkl-include=2020.0=166 - - mkl-service=2.3.0=py36he904b0f_0 - - mkl_fft=1.0.15=py36ha843d7b_0 - - mkl_random=1.1.0=py36hd6b4f25_0 - - ncurses=6.1=hf484d3e_1002 - - ninja=1.9.0=py36hfd86e86_0 - - numpy=1.18.1=py36h4f9e942_0 - - numpy-base=1.18.1=py36hde5b4d6_1 - - olefile=0.46=py_0 - - openssl=1.1.1g=h516909a_0 - - pillow=7.0.0=py36hb39fc2d_0 - - pip=20.0.2=py36_1 - - pycparser=2.20=py_0 - - python=3.6.10=h0371630_0 - - python_abi=3.6=1_cp36m - - pytorch=1.5.0=py3.6_cuda10.1.243_cudnn7.6.3_0 - - pyyaml=5.3.1=py36h7b6447c_0 - - readline=7.0=hf8c457e_1001 - - rhash=1.3.8=h1ba5d50_0 - - setuptools=46.1.3=py36_0 - - six=1.14.0=py36_0 - - sqlite=3.31.1=h7b6447c_0 - - tk=8.6.8=hbc83047_0 - - torchvision=0.6.0=py36_cu101 - - wheel=0.34.2=py36_0 - - xz=5.2.4=h14c3975_4 - - yaml=0.1.7=had09818_2 - - zlib=1.2.11=h7b6447c_3 - - zstd=1.3.7=h0b5b093_0 - - pip: - - ascii-graph==1.5.1 - - attrs==19.3.0 - - audioread==2.1.8 - - autopep8==1.5.1 - - backcall==0.1.0 - - chardet==3.0.4 - - coverage==5.0.4 - - decorator==4.4.2 - - entrypoints==0.3 - - flake8==3.7.9 - - grpcio==1.28.1 - - idna==2.9 - - importlib-metadata==1.6.0 - - inflect==4.1.0 - - ipdb==0.13.2 - - ipython==7.13.0 - - ipython-genutils==0.2.0 - - jedi==0.16.0 - - joblib==0.14.1 - - librosa==0.7.2 - - llvmlite==0.31.0 - - markdown==3.2.1 - - mccabe==0.6.1 - - more-itertools==8.2.0 - - numba==0.48.0 - - onnx==1.6.0 - - onnxruntime==1.2.0 - - packaging==20.3 - - pandas==0.24.2 - - parso==0.6.2 - - pexpect==4.8.0 - - pickleshare==0.7.5 - - pluggy==0.13.1 - - prompt-toolkit==3.0.5 - - protobuf==3.11.3 - - ptyprocess==0.6.0 - - py==1.8.1 - - pycodestyle==2.5.0 - - pyflakes==2.1.1 - - pygments==2.6.1 - - pyparsing==2.4.7 - - pytest==5.4.2 - - python-dateutil==2.8.1 - - pytz==2019.3 - - requests==2.23.0 - - resampy==0.2.2 - - scikit-learn==0.22.2.post1 - - scipy==1.4.1 - - soundfile==0.10.3.post1 - - sox==1.3.7 - - tensorboard==2.0.0 - - toml==0.10.0 - - tqdm==4.31.1 - - traitlets==4.3.3 - - typing-extensions==3.7.4.2 - - text-unidecode==1.3 - - urllib3==1.25.8 - - wcwidth==0.1.9 - - werkzeug==1.0.1 - - wrapt==1.10.11 - - zipp==3.1.0 -prefix: /cb/home/daniel/ws/miniconda3/envs/mlperf-rnnt - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old deleted file mode 100644 index 9546d41cf18..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old +++ /dev/null @@ -1,68 +0,0 @@ -# The format of this config file is 'key = value'. -# The key has the format 'model.scenario.key'. Value is mostly int64_t. -# Model maybe '*' as wildcard. In that case the value applies to all models. -# All times are in milli seconds - -# Set performance_sample_count for each model. -# User can optionally set this to higher values in user.conf. -mobilenet.*.performance_sample_count_override = 1024 -gnmt.*.performance_sample_count_override = 3903900 -resnet50.*.performance_sample_count_override = 1024 -ssd-mobilenet.*.performance_sample_count_override = 256 -ssd-resnet34.*.performance_sample_count_override = 64 -bert.*.performance_sample_count_override = 10833 -dlrm.*.performance_sample_count_override = 204800 -rnnt.*.performance_sample_count_override = 2513 -3d-unet.*.performance_sample_count_override = 16 - -# Set seeds. The seeds will be distributed two weeks before the submission. -# 0x168ad48ada698a73 -*.*.qsl_rng_seed = 1624344308455410291 -# 0x07303fed113b8976 -*.*.sample_index_rng_seed = 517984244576520566 -# 0x8b7e1740dacb67f9 -*.*.schedule_rng_seed = 10051496985653635065 - -*.SingleStream.target_latency_percentile = 90 -*.SingleStream.min_duration = 600000 -*.SingleStream.min_query_count = 1024 - -*.MultiStream.target_qps = 20 -*.MultiStream.target_latency_percentile = 99 -*.MultiStream.max_async_queries = 1 -*.MultiStream.target_latency = 50 -*.MultiStream.min_duration = 600000 -*.MultiStream.min_query_count = 270336 -ssd-resnet34.MultiStream.target_qps = 15 -ssd-resnet34.MultiStream.target_latency = 66 -gnmt.MultiStream.min_query_count = 90112 -gnmt.MultiStream.target_latency = 100 -gnmt.MultiStream.target_qps = 10 -gnmt.MultiStream.target_latency_percentile = 97 - -*.Server.target_latency = 10 -*.Server.target_latency_percentile = 99 -*.Server.target_duration = 0 -*.Server.min_duration = 600000 -*.Server.min_query_count = 270336 -resnet50.Server.target_latency = 15 -ssd-resnet34.Server.target_latency = 100 -gnmt.Server.min_query_count = 90112 -gnmt.Server.target_latency = 250 -gnmt.Server.target_latency_percentile = 97 -bert.Server.target_latency = 130 -dlrm.Server.target_latency = 30 -rnnt.Server.target_latency = 1000 - -*.Offline.target_latency_percentile = 90 -*.Offline.min_duration = 600000 -# In Offline scenario, we always have one query. But LoadGen maps this to -# min_sample_count internally in Offline scenario, so set this to 24576 since -# the rule requires that Offline scenario run for at least 24576 samples. -*.Offline.min_query_count = 24576 - -# These fields should be defined and overridden by user.conf. -*.SingleStream.target_latency = 10 -*.Server.target_qps = 1.0 -*.Offline.target_qps = 1.0 -*.MultiStream.samples_per_query = 4 \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md deleted file mode 100644 index 896cdf71635..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md +++ /dev/null @@ -1,303 +0,0 @@ -# MLPerf Inference - Speech Recognition - RNN-T - -We describe an automated and reproducible workflow for the [RNN-T -workload](https://github.com/mlperf/inference/tree/master/v0.7/speech_recognition/rnnt) -implemented using the [Collective Knowledge](http://cknowledge.org) technology. It automatically -downloads the model and the dataset, preprocesses the dataset, builds the LoadGen API, etc. -For any questions or questions, please email info@dividiti.com or simply [open an issue](https://github.com/mlperf/inference/issues) on GitHub. - -**NB:** Below we give an _essential_ sequence of steps that should result in a successful setup -of the RNN-T workflow on a minimally configured Linux system. - -The steps are extracted from a [minimalistic Amazon Linux -2](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/Dockerfile.amazonlinux.min) -Docker image, which is derived from a more verbose [Amazon Linux -2](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/Dockerfile.amazonlinux) -Docker image by omitting steps that the [Collective Knowledge -framework](https://github.com/ctuning/ck) performs automatically. - -For example, installing the preprocessed dataset is explicit in the verbose image: -``` -#-----------------------------------------------------------------------------# -# Step 3. Download the official MLPerf Inference RNNT dataset (LibriSpeech -# dev-clean) and preprocess it to wav. -#-----------------------------------------------------------------------------# -RUN ck install package --tags=dataset,speech-recognition,dev-clean,original -# NB: Can ignore the lzma related warning. -RUN ck install package --tags=dataset,speech-recognition,dev-clean,preprocessed -#-----------------------------------------------------------------------------# -``` -but is implicit in the minimalistic image: -``` -#- #-----------------------------------------------------------------------------# -#- # Step 3. Download the official MLPerf Inference RNNT dataset (LibriSpeech -#- # dev-clean) and preprocess it to wav. -#- #-----------------------------------------------------------------------------# -#- RUN ck install package --tags=dataset,speech-recognition,dev-clean,original -#- # NB: Can ignore the lzma related warning. -#- RUN ck install package --tags=dataset,speech-recognition,dev-clean,preprocessed -#- #-----------------------------------------------------------------------------# -``` -because it's going to be triggered by a test performance run: -``` -#+ #-----------------------------------------------------------------------------# -#+ # Step 6. Pull all the implicit dependencies commented out in Steps 1-5. -#+ #-----------------------------------------------------------------------------# -RUN ck run program:speech-recognition-pytorch-loadgen --cmd_key=performance --skip_print_timers -#+ #-----------------------------------------------------------------------------# -``` -(Omitted steps are commented out with `#- `. Added steps are commented with `#+ `.) - -For other possible variations and workarounds see the [complete -collection](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/README.md) -of Docker images for this workflow including Ubuntu, Debian and CentOS. - -# Table of Contents - -1. [Installation](#install) - 1. Install [system-wide prerequisites](#install_system) - 1. [Ubuntu 20.04 or similar](#install_system_ubuntu) - 1. [CentOS 7 or similar](#install_system_centos_7) - 1. [CentOS 8 or similar](#install_system_centos_8) - 1. Install [Collective Knowledge](#install_ck) (CK) and its repositories - 1. Detect [GCC](#detect_gcc) - 1. Detect [Python](#detect_python) - 1. Install [Python dependencies](#install_python_deps) - 1. Install a branch of the [MLPerf Inference](#install_inference_repo) repo -1. [Usage](#usage) - 1. [Performance](#usage_performance) - 1. [Accuracy](#usage_performance) - - -## Installation - - -### Install system-wide prerequisites - -**NB:** Run the below commands for your Linux system with `sudo` or as superuser. - - -#### Ubuntu 20.04 or similar -```bash -$ sudo apt update -y -$ sudo apt install -y apt-utils -$ sudo apt upgrade -y -$ sudo apt install -y\ - python3 python3-pip\ - gcc g++\ - make patch vim\ - git wget zip libz-dev\ - libsndfile1-dev -$ sudo apt clean -``` - - -#### CentOS 7 or similar -```bash -$ sudo yum upgrade -y -$ sudo yum install -y\ - python3 python3-pip python3-devel\ - gcc gcc-c++\ - make which patch vim\ - git wget zip unzip\ - tar xz\ - libsndfile-devel -$ sudo yum clean all -``` - - -#### CentOS 8 or similar -```bash -$ sudo yum upgrade -y -$ sudo yum install -y\ - gcc gcc-c++\ - make which patch vim\ - git wget zip unzip\ - openssl-devel bzip2-devel libffi-devel\ -$ sudo yum clean all -$ sudo dnf install -y python3 python3-pip python3-devel -$ sudo dnf --enablerepo=PowerTools install -y libsndfile-devel -``` - - - -### Install [Collective Knowledge](http://cknowledge.org/) (CK) and its repositories - -```bash -$ export CK_PYTHON=/usr/bin/python3 -$ $CK_PYTHON -m pip install --ignore-installed pip setuptools --user -$ $CK_PYTHON -m pip install ck -$ ck version -V1.15.0 -$ ck pull repo:ck-mlperf -$ ck pull repo:ck-pytorch -``` - - -### Detect (system) GCC -``` -$ export CK_CC=/usr/bin/gcc -$ ck detect soft:compiler.gcc --full_path=$CK_CC -$ ck show env --tags=compiler,gcc -Env UID: Target OS: Bits: Name: Version: Tags: - -b8bd7b49f72f9794 linux-64 64 GNU C compiler 7.3.1 64bits,compiler,gcc,host-os-linux-64,lang-c,lang-cpp,target-os-linux-64,v7,v7.3,v7.3.1 -``` -**NB:** Required to build the FLAC and SoX dependencies of preprocessing. CK can normally detect compilers automatically, but we are playing safe here. - - -### Detect (system) Python -``` -$ export CK_PYTHON=/usr/bin/python3 -$ ck detect soft:compiler.python --full_path=$CK_PYTHON -$ ck show env --tags=compiler,python -Env UID: Target OS: Bits: Name: Version: Tags: - -633a6b22205eb07f linux-64 64 python 3.7.6 64bits,compiler,host-os-linux-64,lang-python,python,target-os-linux-64,v3,v3.7,v3.7.6 -``` -**NB:** CK can normally detect available Python interpreters automatically, but we are playing safe here. - - -### Install Python dependencies (in userspace) - -#### Install implicit dependencies via pip -```bash -$ export CK_PYTHON=/usr/bin/python3 -$ $CK_PYTHON -m pip install --user --upgrade \ - tqdm wheel toml unidecode inflect sndfile librosa numba==0.48 -... -Successfully installed inflect-4.1.0 librosa-0.7.2 llvmlite-0.31.0 numba-0.48.0 sndfile-0.2.0 unidecode-1.1.1 wheel-0.34.2 -``` -**NB:** These dependencies are _implicit_, i.e. CK will not try to satisfy them. If they are not installed, however, the workflow will fail. - - -#### Install explicit dependencies via CK (also via `pip`, but register with CK at the same time) -```bash -$ ck install package --tags=python-package,torch -$ ck install package --tags=python-package,pandas -$ ck install package --tags=python-package,sox -$ ck install package --tags=python-package,absl -``` -**NB:** These dependencies are _explicit_, i.e. CK will try to satisfy them automatically. On a machine with multiple versions of Python, things can get messy, so we are playing safe here. - - -### Install an MLPerf Inference [branch](https://github.com/dividiti/inference/tree/dvdt-rnnt) with [dividiti](http://dividiti.com)'s tweaks for RNN-T -```bash -$ ck install package --tags=mlperf,inference,source,dividiti.rnnt -``` -**NB:** This source will be used for building LoadGen as well. - - - -## Usage - - -### Running a performance test - -The first run will end up resolving all the remaining explicit dependencies: -- preprocessing the LibriSpeech Dev-Clean dataset to wav; -- building the LoadGen API; -- downloading the PyTorch model. - -It's a performance run which should print something like: -``` -$ ck run program:speech-recognition-pytorch-loadgen --cmd_key=performance --skip_print_timers -... -Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513 -Running Loadgen test... -Average latency (ms) per query: -7335.167247106061 -Median latency (ms): -7391.662108 -90 percentile latency (ms): -13347.925176 -================================================ -MLPerf Results Summary -================================================ -SUT name : PySUT -Scenario : Offline -Mode : Performance -Samples per second: 4.63626 -Result is : INVALID - Min duration satisfied : NO - Min queries satisfied : Yes -Recommendations: - * Increase expected QPS so the loadgen pre-generates a larger (coalesced) query. - -================================================ -Additional Stats -================================================ -Min latency (ns) : 278432559 -Max latency (ns) : 14235613054 -Mean latency (ns) : 7335167247 -50.00 percentile latency (ns) : 7521181269 -90.00 percentile latency (ns) : 13402430910 -95.00 percentile latency (ns) : 13723706550 -97.00 percentile latency (ns) : 14054764438 -99.00 percentile latency (ns) : 14235613054 -99.90 percentile latency (ns) : 14235613054 - -================================================ -Test Parameters Used -================================================ -samples_per_query : 66 -target_qps : 1 -target_latency (ns): 0 -max_async_queries : 1 -min_duration (ms): 60000 -max_duration (ms): 0 -min_query_count : 1 -max_query_count : 0 -qsl_rng_seed : 3133965575612453542 -sample_index_rng_seed : 665484352860916858 -schedule_rng_seed : 3622009729038561421 -accuracy_log_rng_seed : 0 -accuracy_log_probability : 0 -print_timestamps : false -performance_issue_unique : false -performance_issue_same : false -performance_issue_same_index : 0 -performance_sample_count : 2513 - -No warnings encountered during test. - -No errors encountered during test. -Done! - -Execution time: 38.735 sec. -``` - -The above output is the contents of `mlperf_log_summary.txt`, one of the log files generated by LoadGen. All LoadGen log files can be located in the program's temporary directory: -```bash -$ cd `ck find program:speech-recognition-pytorch-loadgen`/tmp && ls -la mlperf_log_* --rw-r--r-- 1 anton eng 4 Jul 3 18:06 mlperf_log_accuracy.json --rw-r--r-- 1 anton eng 20289 Jul 3 18:06 mlperf_log_detail.txt --rw-r--r-- 1 anton eng 1603 Jul 3 18:06 mlperf_log_summary.txt --rw-r--r-- 1 anton eng 860442 Jul 3 18:06 mlperf_log_trace.json -``` - - -### Running an accuracy test - -``` -$ ck run program:speech-recognition-pytorch-loadgen --cmd_key=accuracy --skip_print_timers -... -Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513 -Running Loadgen test... - -No warnings encountered during test. - -No errors encountered during test. -Running accuracy script: /usr/bin/python3 /disk1/homes/anton/CK-TOOLS/mlperf-inference-dividiti.rnnt/inference/v0.7/speech_recognition/rnnt/accuracy_eval.py --log_dir /disk1/homes/anton/CK/ck-mlperf/program/speech-recognition-pytorch-loadgen/tmp --dataset_dir /homes/anton/CK-TOOLS/dataset-librispeech-preprocessed-to-wav-dev-clean/../ --manifest /homes/anton/CK-TOOLS/dataset-librispeech-preprocessed-to-wav-dev-clean/wav-list.json -Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513 -Word Error Rate: 0.07452253714852645 -Done! - -Execution time: 502.197 sec. - -$ cd `ck find program:speech-recognition-pytorch-loadgen`/tmp && ls -la mlperf_log_* --rw-r--r-- 1 anton eng 3862427 Jul 3 18:00 mlperf_log_accuracy.json --rw-r--r-- 1 anton eng 20126 Jul 3 18:00 mlperf_log_detail.txt --rw-r--r-- 1 anton eng 74 Jul 3 18:00 mlperf_log_summary.txt --rw-r--r-- 1 anton eng 29738248 Jul 3 18:00 mlperf_log_trace.json -``` diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh deleted file mode 100644 index 4929243a6fe..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh +++ /dev/null @@ -1,67 +0,0 @@ - #set -eo pipefail - set -x - - WORKDIR=`pwd` - - PATTERN='[-a-zA-Z0-9_]*=' - if [ $# -lt "0" ] ; then - echo 'ERROR:' - printf 'Please use following parameters: - --code= - ' - exit 1 - fi - - for i in "$@" - do - case $i in - --code=*) - code=`echo $i | sed "s/${PATTERN}//"`;; - *) - echo "Parameter $i not recognized."; exit 1;; - esac - done - - if [ -d $code ];then - REPODIR=$code - fi - - echo "Install dependencies" - pip install sklearn onnx tqdm lark-parser - pip install -e git+https://github.com/mlperf/logging@0.7.0-rc2#egg=mlperf-logging - conda install ninja pyyaml setuptools cmake cffi typing --yes - conda install numpy=1.21.5 --yes - conda install intel-openmp mkl mkl-include --no-update-deps --yes - conda install -c conda-forge gperftools --yes - conda install jemalloc=5.0.1 --yes - pip install opencv-python absl-py opencv-python-headless intel-openmp - - echo "Install libraries" - mkdir $WORKDIR/local - export install_dir=$WORKDIR/local - cd $WORKDIR && mkdir third_party - wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz - cd third_party && tar xf flac-1.3.2.tar.xz && cd flac-1.3.2 - ./configure --prefix=$install_dir && make && make install - - cd $WORKDIR - wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz - cd third_party && tar zxf sox-14.4.2.tar.gz && cd sox-14.4.2 - LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install - - cd $WORKDIR - wget http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz -O third_party/libsndfile-1.0.28.tar.gz - cd third_party && tar zxf libsndfile-1.0.28.tar.gz && cd libsndfile-1.0.28 - ./configure --prefix=$install_dir && make && make install - - echo "Install pytorch/ipex" - export LD_LIBRARY_PATH=$WORKDIR/local/lib:$LD_LIBRARY_PATH - - cd $WORKDIR - bash prepare_loadgen.sh ${WORKDIR} - - echo "Install dependencies for pytorch_SUT.py" - pip install toml text-unidecode inflect - pip install librosa==0.8.1 - - set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh deleted file mode 100644 index 5ca666b39c6..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh +++ /dev/null @@ -1,25 +0,0 @@ -WORKDIR=$1 -pushd . -cd $WORKDIR -echo Current directory is $PWD -echo Using gcc=`which gcc` -echo GCC version should >= 9 -gcc --version -CC=`which gcc` - -# install pytorch -echo "Install pytorch/ipex" -export LD_LIBRARY_PATH=$WORKDIR/local/lib:$LD_LIBRARY_PATH -CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} - -cd $WORKDIR -echo "Install loadgen" -git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference -cd mlperf_inference -git checkout r1.1 -git log -1 -git submodule update --init --recursive -cd loadgen -CFLAGS="-std=c++14" python setup.py install - -popd diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile deleted file mode 100644 index 1cb52bf6261..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3 -FROM ${FROM_IMAGE_NAME} - - -RUN apt-get update && apt-get install -y libsndfile1 && apt-get install -y sox && rm -rf /var/lib/apt/lists/* - -RUN COMMIT_SHA=c6d12f9e1562833c2b4e7ad84cb22aa4ba31d18c && \ - git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer && \ - cd deps/warp-transducer && \ - git checkout $COMMIT_SHA && \ - mkdir build && \ - cd build && \ - cmake .. && \ - make VERBOSE=1 && \ - export CUDA_HOME="/usr/local/cuda" && \ - export WARP_RNNT_PATH=`pwd` && \ - export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME && \ - export LD_LIBRARY_PATH="$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" && \ - export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH && \ - export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH && \ - export CFLAGS="-I$CUDA_HOME/include $CFLAGS" && \ - cd ../pytorch_binding && \ - python3 setup.py install --user && \ - rm -rf ../tests test ../tensorflow_binding && \ - cd ../../.. - -WORKDIR /workspace/jasper - -COPY requirements.txt . -RUN pip install --disable-pip-version-check -U -r requirements.txt - -COPY . . diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE deleted file mode 100644 index 75ee157cd96..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE +++ /dev/null @@ -1,204 +0,0 @@ - Except where otherwise noted, the following license applies to all files in this repo. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2019 NVIDIA Corporation - Copyright 2019 Myrtle Software Limited, www.myrtle.ai - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE deleted file mode 100644 index 7916839bcc4..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE +++ /dev/null @@ -1,5 +0,0 @@ -Jasper in PyTorch - -This repository includes source code (in "parts/") from: -* https://github.com/keithito/tacotron and https://github.com/ryanleary/patter licensed under MIT license. - diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml deleted file mode 100644 index a4cd1dfb470..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -model = "RNNT" - -[input] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 # TODO -max_duration = 16.7 -speed_perturbation = true - - -cutout_rect_regions = 0 -cutout_rect_time = 60 -cutout_rect_freq = 25 - - -cutout_x_regions = 2 -cutout_y_regions = 2 -cutout_x_width = 6 -cutout_y_width = 6 - - -[input_eval] -normalize = "per_feature" -sample_rate = 16000 -window_size = 0.02 -window_stride = 0.01 -window = "hann" -features = 80 -n_fft = 512 -frame_splicing = 3 -dither = 0.00001 -feat_type = "logfbank" -normalize_transcripts = true -trim_silence = true -pad_to = 0 - - -[rnnt] -rnn_type = "lstm" -encoder_n_hidden = 1024 -encoder_pre_rnn_layers = 2 -encoder_stack_time_factor = 2 -encoder_post_rnn_layers = 3 -pred_n_hidden = 320 -pred_rnn_layers = 2 -forget_gate_bias = 1.0 -joint_n_hidden = 512 -dropout=0.32 - - -[labels] -labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py deleted file mode 100644 index 7b9036f1c55..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This file contains classes and functions related to data loading -""" -from collections import namedtuple -import torch -import numpy as np -from torch.utils.data import Dataset -from parts.manifest import Manifest -from parts.features import WaveformFeaturizer - - -def seq_collate_fn(batch): - """batches samples and returns as tensors - Args: - batch : list of samples - Returns - batches of tensors - """ - audio_lengths = torch.LongTensor([sample.waveform.size(0) - for sample in batch]) - transcript_lengths = torch.LongTensor([sample.transcript.size(0) - for sample in batch]) - permute_indices = torch.argsort(audio_lengths, descending=True) - - audio_lengths = audio_lengths[permute_indices] - transcript_lengths = transcript_lengths[permute_indices] - padded_audio_signals = torch.nn.utils.rnn.pad_sequence( - [batch[i].waveform for i in permute_indices], - batch_first=True - ) - transcript_list = [batch[i].transcript - for i in permute_indices] - packed_transcripts = torch.nn.utils.rnn.pack_sequence(transcript_list, - enforce_sorted=False) - - # TODO: Don't I need to stop grad at some point now? - return (padded_audio_signals, audio_lengths, transcript_list, - packed_transcripts, transcript_lengths) - - -class AudioToTextDataLayer: - """Data layer with data loader - """ - - def __init__(self, **kwargs): - featurizer_config = kwargs['featurizer_config'] - pad_to_max = kwargs.get('pad_to_max', False) - perturb_config = kwargs.get('perturb_config', None) - manifest_filepath = kwargs['manifest_filepath'] - dataset_dir = kwargs['dataset_dir'] - labels = kwargs['labels'] - batch_size = kwargs['batch_size'] - drop_last = kwargs.get('drop_last', False) - shuffle = kwargs.get('shuffle', True) - min_duration = featurizer_config.get('min_duration', 0.1) - max_duration = featurizer_config.get('max_duration', None) - normalize_transcripts = kwargs.get('normalize_transcripts', True) - trim_silence = kwargs.get('trim_silence', False) - sampler_type = kwargs.get('sampler', 'default') - speed_perturbation = featurizer_config.get('speed_perturbation', False) - sort_by_duration = sampler_type == 'bucket' - self._featurizer = WaveformFeaturizer.from_config( - featurizer_config, perturbation_configs=perturb_config) - self._dataset = AudioDataset( - dataset_dir=dataset_dir, - manifest_filepath=manifest_filepath, - labels=labels, blank_index=len(labels), - sort_by_duration=sort_by_duration, - pad_to_max=pad_to_max, - featurizer=self._featurizer, max_duration=max_duration, - min_duration=min_duration, normalize=normalize_transcripts, - trim=trim_silence, speed_perturbation=speed_perturbation) - - print('sort_by_duration', sort_by_duration) - - self._dataloader = torch.utils.data.DataLoader( - dataset=self._dataset, - batch_size=batch_size, - collate_fn=lambda b: seq_collate_fn(b), - drop_last=drop_last, - shuffle=shuffle, - num_workers=0, - pin_memory=True, - sampler=None - ) - - def __len__(self): - return len(self._dataset) - - @property - def data_iterator(self): - return self._dataloader - - -class AudioDataset(Dataset): - def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False, - min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False, - trim=False, speed_perturbation=False): - """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations - (in seconds). Each entry is a different audio sample. - Args: - dataset_dir: absolute path to dataset folder - manifest_filepath: relative path from dataset folder to manifest json as described above. - labels: String containing all the possible characters to map to - featurizer: Initialized featurizer class that converts paths of audio to feature tensors - max_duration: If audio exceeds this length, do not include in dataset - min_duration: If audio is less than this length, do not include in dataset - pad_to_max: if specified input sequences into dnn model will be padded to max_duration - blank_index: blank index for ctc loss / decoder - max_utts: Limit number of utterances - normalize: whether to normalize transcript text - sort_by_duration: whether or not to sort sequences by increasing duration - trim: if specified trims leading and trailing silence from an audio signal. - speed_perturbation: specify if using data contains speed perburbation - """ - m_paths = [manifest_filepath] - self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max, - max_duration=max_duration, - sort_by_duration=sort_by_duration, - min_duration=min_duration, max_utts=max_utts, - normalize=normalize, speed_perturbation=speed_perturbation) - self.featurizer = featurizer - self.blank_index = blank_index - self.trim = trim - print( - "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format( - self.manifest.duration / 3600, - self.manifest.filtered_duration / 3600)) - - def __getitem__(self, index): - sample = self.manifest[index] - rn_indx = np.random.randint(len(sample['audio_filepath'])) - duration = sample['audio_duration'][rn_indx] if 'audio_duration' in sample else 0 - offset = sample['offset'] if 'offset' in sample else 0 - features = self.featurizer.process(sample['audio_filepath'][rn_indx], - offset=offset, duration=duration, - trim=self.trim) - - AudioSample = namedtuple('AudioSample', ['waveform', - 'transcript']) - return AudioSample(features, - torch.LongTensor(sample["transcript"])) - - def __len__(self): - return len(self.manifest) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py deleted file mode 100644 index 9ac9fa61aaf..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py +++ /dev/null @@ -1,405 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List, Optional, Tuple - -import torch -import time - -import torch.nn.functional as F -from model_separable_rnnt import label_collate - -class ScriptGreedyDecoder(torch.nn.Module): - """A greedy transducer decoder. - - Args: - blank_symbol: See `Decoder`. - model: Model to use for prediction. - max_symbols_per_step: The maximum number of symbols that can be added - to a sequence in a single time step; if set to None then there is - no limit. - cutoff_prob: Skip to next step in search if current highest character - probability is less than this. - """ - - def __init__(self, blank_index, model, max_symbols_per_step=30): - super().__init__() - #assert isinstance(model, torch.jit.ScriptModule) - # assert not model.training - self.eval() - self._model = model - self._blank_id = blank_index - self._SOS = -1 - assert max_symbols_per_step > 0 - self._max_symbols_per_step = max_symbols_per_step - - @torch.jit.export - def forward_dec_single_batch(self, logits: torch.Tensor, logits_lens: torch.Tensor, int8, bf16) -> List[List[int]]: - """Returns a list of sentences given an input batch. - - Args: - logits: logits produced by encoder - logits_lens: length of each logits - - Returns: - list containing batch number of sentences (strings). - """ - import intel_pytorch_extension as ipex - logits = logits.to(ipex.DEVICE) - if int8: - if bf16: - # enable bf16 for decoder part - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - else: - # the case of int8 = False and bf16 = True had already processed in higher level - pass - - # inseq: TxBxF - logitlen = logits_lens[0] - sentence = self._greedy_decode(logits, logitlen) - - return [sentence] - - @torch.jit.export - def forward_single_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, bf16, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]: - """Returns a list of sentences given an input batch. - - Args: - x: A tensor of size (batch, channels, features, seq_len) - TODO was (seq_len, batch, in_features). - out_lens: list of int representing the length of each sequence - output sequence. - - Returns: - list containing batch number of sentences (strings). - """ - # Apply optional preprocessing - - t0 = time.time() - if int8: - import intel_pytorch_extension as ipex - with ipex.AutoMixPrecision(conf, running_mode=run_mode): - logits, logits_lens = self._model.encoder(x, out_lens) - - # TODO: support directly reorder data from int8 to bf16 - # This is an workaround here to transfer logits to cpu - # to reorder data from int8 to fp32 - logits = logits.to("cpu") - logits = logits.to(ipex.DEVICE) - - if bf16: - # enable bf16 for decoder part - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - else: - # the case of int8 = False and bf16 = True had already processed in higher level - logits, logits_lens = self._model.encoder(x, out_lens) - - #os.environ['OMP_NUM_THREADS'] = '1' - t1 = time.time() - # inseq: TxBxF - logitlen = logits_lens[0] - sentence = self._greedy_decode(logits, logitlen) - t2 = time.time() - - return logits, logits_lens, [sentence], t1-t0, t2-t1 - - def _greedy_decode(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]: - hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - label: List[int] = [] - timesteps = int(out_len.item()) - last_symb = self._SOS - time_idx = 0 - x.unsqueeze_(0) - - symb_added = 0 - while 1: - g, hidden_prime = self._pred_step(last_symb, hidden) - logp = self._joint_step_nolog(x[:, :, time_idx, :], g) - - # get index k, of max prob - _, k = logp.max(0) - k = k.item() - - if k == self._blank_id or symb_added >= self._max_symbols_per_step: - time_idx += 1 - if time_idx >= timesteps: - break - symb_added = 0 - else: - last_symb = k - label.append(k) - symb_added += 1 - hidden = hidden_prime - - return label - - """ - def _greedy_decode_origin(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]: - hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - label: List[int] = [] - for time_idx in range(int(out_len.item())): - f = x[:, time_idx, :].unsqueeze_(0) - - not_blank = True - symbols_added = 0 - - while not_blank and symbols_added < self._max_symbols_per_step: - g, hidden_prime = self._pred_step( - self._get_last_symb(label), - hidden - ) - logp = self._joint_step(f, g, log_normalize=False)[0, :] - - # get index k, of max prob - v, k = logp.max(0) - k = k.item() - - if k == self._blank_id: - not_blank = False - else: - label.append(k) - hidden = hidden_prime - symbols_added += 1 - - return label - """ - - def _pred_step(self, label: int, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - #if label > self._blank_id: - # label -= 1 - label = torch.tensor([[label]], dtype=torch.int64) - result = self._model.prediction(label, hidden) - return result - - def _joint_step_nolog(self, enc: torch.Tensor, pred: torch.Tensor) -> torch.Tensor: - return self._model.joint(enc, pred)[0, 0, 0, :] - - def _joint_step(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor: - logits = self._model.joint(enc, pred)[:, 0, 0, :] - if not log_normalize: - return logits - - probs = F.log_softmax(logits, dim=len(logits.shape) - 1) - - return probs - - def _get_last_symb(self, labels: List[int]) -> int: - return self._SOS if len(labels) == 0 else labels[-1] - - @torch.jit.export - def forward_enc_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor]: - """Returns a list of sentences given an input batch. - - Args: - x: A tensor of size (batch, channels, features, seq_len) - TODO was (seq_len, batch, in_features). - out_lens: list of int representing the length of each sequence - output sequence. - - Returns: - logits and logits lens - """ - # Apply optional preprocessing - # int8 encoder + bf16 decoder - if int8: - import intel_pytorch_extension as ipex - with ipex.AutoMixPrecision(conf, running_mode=run_mode): - logits, logits_lens = self._model.encoder(x, out_lens) - - # TODO: support directly reorder data from int8 to bf16 - # This is an workaround here to transfer logits to cpu - # to reorder data from int8 to fp32 - logits = logits.to("cpu") - else: - # the case of int8 = False and bf16 = True had already processed in higher level - logits, logits_lens = self._model.encoder(x, out_lens) - - return logits, logits_lens - - @torch.jit.export - def forward_dec_batch(self, logits: torch.Tensor, logits_lens: torch.Tensor, int8, bf16) -> Tuple[List[List[int]], float]: - """Returns a list of sentences given an input batch. - - Args: - logits, logits_lens: encoder input - - Returns: - list containing batch number of sentences (strings). - """ - # Apply optional preprocessing - # int8 encoder + bf16 decoder - import intel_pytorch_extension as ipex - logits = logits.to(ipex.DEVICE) - if int8: - if bf16: - # enable bf16 for decoder part - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - else: - # the case of int8 = False and bf16 = True had already processed in higher level - pass - - sentences = self._greedy_decode_batch(logits, logits_lens) - - return sentences - - @torch.jit.export - def forward_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, bf16, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]: - """Returns a list of sentences given an input batch. - - Args: - x: A tensor of size (batch, channels, features, seq_len) - TODO was (seq_len, batch, in_features). - out_lens: list of int representing the length of each sequence - output sequence. - - Returns: - list containing batch number of sentences (strings). - """ - """ - # Apply optional preprocessing - # int8 encoder + bf16 decoder - t0 = time.time() - if int8: - import intel_pytorch_extension as ipex - with ipex.AutoMixPrecision(conf, running_mode=run_mode): - logits, logits_lens = self._model.encoder(x, out_lens) - - # TODO: support directly reorder data from int8 to bf16 - # This is an workaround here to transfer logits to cpu - # to reorder data from int8 to fp32 - logits = logits.to("cpu") - logits = logits.to(ipex.DEVICE) - - if bf16: - # enable bf16 for decoder part - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - else: - # the case of int8 = False and bf16 = True had already processed in higher level - logits, logits_lens = self._model.encoder(x, out_lens) - - t1 = time.time() - sentences = self._greedy_decode_batch(logits, logits_lens) - t2 = time.time() - - return logits, logits_lens, sentences, t1-t0, t2-t1 - """ - t0 = time.time() - logits, logits_lens = self.forward_enc_batch(x, out_lens, conf, int8, bf16, run_mode) - t1 = time.time() - sentences = self.forward_dec_batch(logits, logits_lens, int8, bf16) - t2 = time.time() - return logits, logits_lens, sentences, t1-t0, t2-t1 - - def count_nonzero(self, x: torch.Tensor) -> int: - return x.nonzero().shape[0] - - def _greedy_decode_batch(self, x: torch.Tensor, out_lens: torch.Tensor) -> List[List[int]]: - batch_size = x.size(0) - hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None - max_len = out_lens.max().item() - max_lens = torch.tensor([max_len-1] * batch_size, dtype=torch.int64) - # pos 0 of label_tensor is set to _SOS to simplify computation - # real label start from pos 1 - label_tensor = torch.tensor([self._SOS]).repeat(batch_size, max_len*self._max_symbols_per_step) # (B, T/2*max_symbols_per_step) - # (row, col) of current labels end - label_row = torch.tensor(list(range(batch_size))) - label_col = torch.tensor([0] * batch_size) - # this list will be used to return labels to caller - label_copy = [0] * batch_size - # initially time_idx is 0 for all input - # then advance time_idx for each 'track' when needed and update f - f = x[:, 0, :].unsqueeze(1) - time_idxs = torch.tensor([0] * batch_size, dtype=torch.int64) - - not_blank = True - blank_vec = torch.tensor([0] * batch_size, dtype=torch.int) - symbols_added = torch.tensor([0] * batch_size, dtype=torch.int) - - while True: - g, hidden_prime = self._pred_step_batch( - label_tensor.gather(1, label_col.unsqueeze(1)), - hidden, - batch_size - ) - logp = self._joint_step_batch(f, g, log_normalize=False) - - # get index k, of max prob - v, k = logp.max(1) - - # if any of the output is blank, pull in the next time_idx for next f - # tmp_blank_vec is the vect used to mix new hidden state with previous hidden state - # blank_vec is the baseline of blank_vec, it turns to blank only when run out of time_idx - blankness = k.eq(self._blank_id) - time_idxs = time_idxs + blankness - symbols_added *= blankness.logical_not() - # it doesn't matter if blank_vec is update now or later, - # tmp_blank_vec always get correct value for this round - blank_vec = time_idxs.ge(out_lens) - tmp_blank_vec = blank_vec.logical_or(blankness) - - if self.count_nonzero(blank_vec) == batch_size: - # all time_idxs processed, stop - break - else: - # If for sample blankid already encountered, then stop - # update hidden values until input from next time step. - # So we would mix value of hidden and hidden_prime together, - # keep values in hidden where blank_vec[i] is true - if hidden == None: - hidden = [torch.zeros_like(hidden_prime[0]), torch.zeros_like(hidden_prime[1])] - - idx = (tmp_blank_vec.eq(0)).nonzero(as_tuple=True)[0] - hidden[0][:, idx, :] = hidden_prime[0][:, idx, :] - hidden[1][:, idx, :] = hidden_prime[1][:, idx, :] - - label_col += tmp_blank_vec.eq(False) - label_tensor.index_put_([label_row, label_col], (k-self._SOS)*tmp_blank_vec.eq(False), accumulate=True) - - symbols_added += tmp_blank_vec.eq(False) - sym_ge_vec = symbols_added.ge(self._max_symbols_per_step) - if sym_ge_vec.count_nonzero() != 0: - time_idxs += sym_ge_vec - blankness.logical_or(sym_ge_vec) - symbols_added *= symbols_added.lt(self._max_symbols_per_step) - - # update f if necessary - # if at least one id in blankness is blank them time_idx is updated - # and we need to update f accordingly - if self.count_nonzero(blankness) > 0: - fetch_time_idxs = time_idxs.min(max_lens) - # select tensor along second dim of x - # implement something like --> f = x[:, :, fetch_time_idxs, :] - # for example, if all elements in fetch_time_idxs = n, then - # this is equivelent to f = x[:, :, n, :] - f = x[list(range(batch_size)), fetch_time_idxs, :].unsqueeze(1) - for i in range(batch_size): - label_copy[i]=label_tensor[i][1:label_col[i]+1].tolist() - return label_copy - - def _pred_step_batch(self, label, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]], batch_size) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - # not really need this line, _blank_id is the last id of dict - #label = label - label.gt(self._blank_id).int() - result = self._model.prediction(label, hidden, batch_size) - return result - - def _joint_step_batch(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor: - logits = self._model.joint(enc, pred) - logits = logits[:, 0, 0, :] - if not log_normalize: - return logits - - probs = F.log_softmax(logits, dim=len(logits.shape) - 1) - - return probs diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py deleted file mode 100644 index cfe3b66f3c8..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from enum import Enum -from metrics import word_error_rate - - -class Optimization(Enum): - """Various levels of Optimization. - WARNING: This might have effect on model accuracy.""" - nothing = 0 - mxprO0 = 1 - mxprO1 = 2 - mxprO2 = 3 - mxprO3 = 4 - - -AmpOptimizations = {Optimization.mxprO0: "O0", - Optimization.mxprO1: "O1", - Optimization.mxprO2: "O2", - Optimization.mxprO3: "O3"} - - -def add_blank_label(labels): - if not isinstance(labels, list): - raise ValueError("labels must be a list of symbols") - labels.append("") - return labels - - -def __rnnt_decoder_predictions_tensor(tensor, labels): - """ - Takes output of greedy rnnt decoder and converts to strings. - Args: - tensor: model output tensor - label: A list of labels - Returns: - prediction - """ - hypotheses = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - # iterate over batch - for ind in range(len(tensor)): - hypothesis = ''.join([labels_map[c] for c in tensor[ind]]) - hypotheses.append(hypothesis) - return hypotheses - - -def __gather_predictions(predictions_list: list, labels: list) -> list: - results = [] - for prediction in predictions_list: - results += __rnnt_decoder_predictions_tensor(prediction, labels=labels) - return results - - -def __gather_transcripts(transcript_list: list, transcript_len_list: list, - labels: list) -> list: - results = [] - labels_map = dict([(i, labels[i]) for i in range(len(labels))]) - for i, t in enumerate(transcript_list): - target = t.numpy().tolist() - reference = ''.join([labels_map[c] for c in target]) - results.append(reference) - return results - - -def process_evaluation_batch(tensors: dict, global_vars: dict, labels: list): - """ - Processes results of an iteration and saves it in global_vars - Args: - tensors: dictionary with results of an evaluation iteration, e.g. loss, predictions, transcript, and output - global_vars: dictionary where processes results of iteration are saved - labels: A list of labels - """ - for kv, v in tensors.items(): - if kv.startswith('predictions'): - global_vars['predictions'] += __gather_predictions( - v, labels=labels) - elif kv.startswith('transcript_length'): - transcript_len_list = v - elif kv.startswith('transcript'): - transcript_list = v - - global_vars['transcripts'] += __gather_transcripts(transcript_list, - transcript_len_list, - labels=labels) - - -def process_evaluation_epoch(global_vars: dict, tag=None): - """ - Processes results from each worker at the end of evaluation and combine to final result - Args: - global_vars: dictionary containing information of entire evaluation - Return: - wer: final word error rate - loss: final loss - """ - hypotheses = global_vars['predictions'] - references = global_vars['transcripts'] - - wer, scores, num_words = word_error_rate( - hypotheses=hypotheses, references=references) - return wer - - -def print_dict(d): - maxLen = max([len(ii) for ii in d.keys()]) - fmtString = '\t%' + str(maxLen) + 's : %s' - print('Arguments:') - for keyPair in sorted(d.items()): - print(fmtString % keyPair) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py deleted file mode 100644 index 5426e37237a..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import List - - -def __levenshtein(a: List, b: List) -> int: - """Calculates the Levenshtein distance between a and b. - """ - n, m = len(a), len(b) - if n > m: - # Make sure n <= m, to use O(min(n,m)) space - a, b = b, a - n, m = m, n - - current = list(range(n + 1)) - for i in range(1, m + 1): - previous, current = current, [i] + [0] * n - for j in range(1, n + 1): - add, delete = previous[j] + 1, current[j - 1] + 1 - change = previous[j - 1] - if a[j - 1] != b[i - 1]: - change = change + 1 - current[j] = min(add, delete, change) - - return current[n] - - -def word_error_rate(hypotheses: List[str], references: List[str]) -> float: - """ - Computes Average Word Error rate between two texts represented as - corresponding lists of string. Hypotheses and references must have same length. - - Args: - hypotheses: list of hypotheses - references: list of references - - Returns: - (float) average word error rate - """ - scores = 0 - words = 0 - if len(hypotheses) != len(references): - raise ValueError("In word error rate calculation, hypotheses and reference" - " lists must have the same number of elements. But I got:" - "{0} and {1} correspondingly".format(len(hypotheses), len(references))) - for h, r in zip(hypotheses, references): - h_list = h.split() - r_list = r.split() - words += len(r_list) - scores += __levenshtein(h_list, r_list) - if words != 0: - wer = (1.0 * scores) / words - else: - wer = float('inf') - return wer, scores, words diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py deleted file mode 100644 index f0ef252130c..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py +++ /dev/null @@ -1,216 +0,0 @@ -from typing import Optional, Tuple - -import numpy as np -import torch - -from rnn import rnn -from rnn import StackTime - - -class RNNT(torch.nn.Module): - def __init__(self, rnnt=None, num_classes=1, **kwargs): - super().__init__() - if kwargs.get("no_featurizer", False): - in_features = kwargs.get("in_features") - else: - feat_config = kwargs.get("feature_config") - # This may be useful in the future, for MLPerf - # configuration. - in_features = feat_config['features'] * \ - feat_config.get("frame_splicing", 1) - - self.encoder = Encoder(in_features, - rnnt["encoder_n_hidden"], - rnnt["encoder_pre_rnn_layers"], - rnnt["encoder_post_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["encoder_stack_time_factor"], - rnnt["dropout"], - ) - - self.prediction = Prediction( - num_classes, - rnnt["pred_n_hidden"], - rnnt["pred_rnn_layers"], - rnnt["forget_gate_bias"], - None if "norm" not in rnnt else rnnt["norm"], - rnnt["rnn_type"], - rnnt["dropout"], - -1, #_SOS - ) - - self.joint = Joint( - num_classes, - rnnt["pred_n_hidden"], - rnnt["encoder_n_hidden"], - rnnt["joint_n_hidden"], - rnnt["dropout"], - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - return self.encoder(x_padded, x_lens) - - -class Encoder(torch.nn.Module): - def __init__(self, in_features, encoder_n_hidden, - encoder_pre_rnn_layers, encoder_post_rnn_layers, - forget_gate_bias, norm, rnn_type, encoder_stack_time_factor, - dropout): - super().__init__() - self.pre_rnn = rnn( - rnn=rnn_type, - input_size=in_features, - hidden_size=encoder_n_hidden, - num_layers=encoder_pre_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - self.stack_time = StackTime(factor=encoder_stack_time_factor) - self.post_rnn = rnn( - rnn=rnn_type, - input_size=encoder_stack_time_factor * encoder_n_hidden, - hidden_size=encoder_n_hidden, - num_layers=encoder_post_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - norm_first_rnn=True, - dropout=dropout, - ) - - def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - x_padded, _ = self.pre_rnn(x_padded, None) - x_padded, x_lens = self.stack_time(x_padded, x_lens) - # (T, B, H) - x_padded, _ = self.post_rnn(x_padded, None) - # (B, T, H) - x_padded = x_padded.transpose_(0, 1) - return x_padded, x_lens - -class Prediction(torch.nn.Module): - def __init__(self, vocab_size, n_hidden, pred_rnn_layers, - forget_gate_bias, norm, rnn_type, dropout, sos_val): - super().__init__() - self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden) - self.n_hidden = n_hidden - self.dec_rnn = rnn( - rnn=rnn_type, - input_size=n_hidden, - hidden_size=n_hidden, - num_layers=pred_rnn_layers, - norm=norm, - forget_gate_bias=forget_gate_bias, - dropout=dropout, - ) - self._SOS = sos_val - - def forward(self, y: torch.Tensor, - state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - b: int = 1) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - """ - B - batch size - U - label length - H - Hidden dimension size - L - Number of decoder layers = 2 - - Args: - y: (B, U) - - Returns: - Tuple (g, hid) where: - g: (B, U + 1, H) - hid: (h, c) where h is the final sequence hidden state and c is - the final cell state: - h (tensor), shape (L, B, H) - c (tensor), shape (L, B, H) - """ - # SOS hack, there is no SOS, and SOS should as if embedding give 0.0 - # So identify SOS and fill lookup result with 0.0 - # If embedding table contains SOS token this would save a lot of - # trouble - y_mask = y.eq(self._SOS) - y.masked_fill_(y_mask, 0) - y = self.embed(y) - y.masked_fill_(y_mask.unsqueeze(2), 0.0) - - # if state is None: - # batch = y.size(0) - # state = [ - # (torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device), - # torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device)) - # for _ in range(self.pred_rnn_layers) - # ] - - y = y.transpose_(0, 1) # .contiguous() # (U + 1, B, H) - g, hid = self.dec_rnn(y, state) - g = g.transpose_(0, 1) # .contiguous() # (B, U + 1, H) - # del y, state - return g, hid - -class Joint(torch.nn.Module): - def __init__(self, vocab_size, pred_n_hidden, enc_n_hidden, - joint_n_hidden, dropout): - super().__init__() - layers = [ - torch.nn.Linear(pred_n_hidden + enc_n_hidden, joint_n_hidden), - torch.nn.ReLU(), - ] + ([torch.nn.Dropout(p=dropout), ] if dropout else []) + [ - torch.nn.Linear(joint_n_hidden, vocab_size) - ] - self.net = torch.nn.Sequential( - *layers - ) - - def forward(self, f: torch.Tensor, g: torch.Tensor): - """ - f should be shape (B, T, H) - g should be shape (B, U + 1, H) - - returns: - logits of shape (B, T, U, K + 1) - """ - # Combine the input states and the output states - B, T, H = f.shape - B, U_, H2 = g.shape - - f = f.unsqueeze(dim=2) # (B, T, 1, H) - f = f.expand((B, T, U_, H)) - - g = g.unsqueeze(dim=1) # (B, 1, U + 1, H) - g = g.expand((B, T, U_, H2)) - - inp = torch.cat([f, g], dim=3) # (B, T, U, 2H) - res = self.net(inp) - # del f, g, inp - return res - -def label_collate(labels): - """Collates the label inputs for the rnn-t prediction network. - - If `labels` is already in torch.Tensor form this is a no-op. - - Args: - labels: A torch.Tensor List of label indexes or a torch.Tensor. - - Returns: - A padded torch.Tensor of shape (batch, max_seq_len). - """ - - if isinstance(labels, torch.Tensor): - return labels.type(torch.int64) - if not isinstance(labels, (list, tuple)): - raise ValueError( - f"`labels` should be a list or tensor not {type(labels)}" - ) - - batch_size = len(labels) - max_len = max(len(l) for l in labels) - - cat_labels = np.full((batch_size, max_len), fill_value=0.0, dtype=np.int32) - for e, l in enumerate(labels): - cat_labels[e, :len(l)] = l - labels = torch.LongTensor(cat_labels) - - return labels diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py deleted file mode 100644 index 7b839dfa47a..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py +++ /dev/null @@ -1,260 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn -import math -import librosa -from .segment import AudioSegment - - -class WaveformFeaturizer(object): - def __init__(self, input_cfg): - self.cfg = input_cfg - - def process(self, file_path, offset=0, duration=0, trim=False): - audio = AudioSegment.from_file(file_path, - target_sr=self.cfg['sample_rate'], - int_values=self.cfg.get( - 'int_values', False), - offset=offset, duration=duration, trim=trim) - return self.process_segment(audio) - - def process_segment(self, audio_segment): - return torch.tensor(audio_segment.samples, dtype=torch.float) - - @classmethod - def from_config(cls, input_config, perturbation_configs=None): - return cls(input_config) - - -constant = 1e-5 - - -def normalize_batch(x, seq_len, normalize_type): - if normalize_type == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - return (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif normalize_type == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - return (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - return x - - -def splice_frames(x, frame_splicing): - """ Stacks frames together across feature dim - - input is batch_size, feature_dim, num_frames - output is batch_size, feature_dim*frame_splicing, num_frames - - """ - seq = [x] - for n in range(1, frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - return torch.cat(seq, dim=1)[:, :, ::frame_splicing] - - -class FilterbankFeatures(nn.Module): - def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01, - window="hamming", normalize="per_feature", n_fft=None, - preemph=0.97, - nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant, - pad_to=8, - max_duration=16.7, - frame_splicing=1): - super(FilterbankFeatures, self).__init__() -# print("PADDING: {}".format(pad_to)) - - torch_windows = { - 'hann': torch.hann_window, - 'hamming': torch.hamming_window, - 'blackman': torch.blackman_window, - 'bartlett': torch.bartlett_window, - 'none': None, - } - - self.win_length = int(sample_rate * window_size) # frame size - self.hop_length = int(sample_rate * window_stride) - self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length)) - - self.normalize = normalize - self.log = log - self.dither = dither - self.frame_splicing = frame_splicing - self.nfilt = nfilt - self.preemph = preemph - self.pad_to = pad_to - # For now, always enable this. - # See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration - self.use_deterministic_dithering = True - highfreq = highfreq or sample_rate / 2 - window_fn = torch_windows.get(window, None) - window_tensor = window_fn(self.win_length, - periodic=False) if window_fn else None - filterbanks = torch.tensor( - librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, - fmax=highfreq), dtype=torch.float).unsqueeze(0) - # self.fb = filterbanks - # self.window = window_tensor - self.register_buffer("fb", filterbanks) - self.register_buffer("window", window_tensor) - # Calculate maximum sequence length (# frames) - max_length = 1 + math.ceil( - (max_duration * sample_rate - self.win_length) / self.hop_length - ) - max_pad = 16 - (max_length % 16) - self.max_length = max_length + max_pad - - def get_seq_len(self, seq_len): - seq_len = (seq_len + self.hop_length - 1) // self.hop_length - seq_len = (seq_len + self.frame_splicing - 1) // self.frame_splicing - return seq_len - - @torch.no_grad() - def forward(self, inp: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: - x, seq_len = inp - - dtype = x.dtype - - seq_len = self.get_seq_len(seq_len) - - # dither - if self.dither > 0 and not self.use_deterministic_dithering: - x += self.dither * torch.randn_like(x) - - # do preemphasis - # Ideally, we would mask immediately after this... Ugh :( - if self.preemph is not None: - x = torch.cat((x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]), - dim=1) - - # do stft - x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length, - win_length=self.win_length, - center=True, window=self.window.to(dtype=torch.float), - return_complex=False) - - # get power spectrum - x = x.pow(2).sum(-1) - - if self.dither > 0 and self.use_deterministic_dithering: - x = x + self.dither ** 2 - # dot with filterbank energies - x = torch.matmul(self.fb.to(x.dtype), x) - - # log features if required - if self.log: - x = torch.log(x + 1e-20) - - # frame splicing if required - if self.frame_splicing > 1: - seq = [x] - for n in range(1, self.frame_splicing): - tmp = torch.zeros_like(x) - tmp[:, :, :-n] = x[:, :, n:] - seq.append(tmp) - x = torch.cat(seq, dim=1)[:, :, ::self.frame_splicing] - - # normalize if required - constant = 1e-5 - if self.normalize == "per_feature": - x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype, - device=x.device) - for i in range(x.shape[0]): - x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1) - x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1) - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2) - elif self.normalize == "all_features": - x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device) - for i in range(x.shape[0]): - x_mean[i] = x[i, :, :seq_len[i].item()].mean() - x_std[i] = x[i, :, :seq_len[i].item()].std() - # make sure x_std is not zero - x_std += constant - x = (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1) - else: - x = x - - # Hmmm... They don't do any masking anymore. Seems concerning! - - # mask to zero any values beyond seq_len in batch, pad to multiple of `pad_to` (for efficiency) - # max_len = x.size(-1) - x = x[:, :, :seq_len.max()] # rnnt loss requires lengths to match - # mask = torch.arange(max_len).to(seq_len.dtype).to(x.device).expand(x.size(0), - # max_len) >= seq_len.unsqueeze(1) - - # x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0) - pad_to = self.pad_to - if pad_to != 0: - raise NotImplementedError() - # if pad_to == "max": - # x = nn.functional.pad(x, (0, self.max_length - x.size(-1))) - # elif pad_to > 0: - # pad_amt = x.size(-1) % pad_to - # if pad_amt != 0: - # x = nn.functional.pad(x, (0, pad_to - pad_amt)) - - return x.to(dtype) - - @classmethod - def from_config(cls, cfg, log=False): - return cls(sample_rate=cfg['sample_rate'], window_size=cfg['window_size'], - window_stride=cfg['window_stride'], n_fft=cfg['n_fft'], - nfilt=cfg['features'], window=cfg['window'], - normalize=cfg['normalize'], - max_duration=cfg.get('max_duration', 16.7), - dither=cfg['dither'], pad_to=cfg.get("pad_to", 0), - frame_splicing=cfg.get("frame_splicing", 1), log=log) - - -class FeatureFactory(object): - featurizers = { - "logfbank": FilterbankFeatures, - "fbank": FilterbankFeatures, - } - - def __init__(self): - pass - - @classmethod - def from_config(cls, cfg): - feat_type = cfg.get('feat_type', "logspect") - featurizer = cls.featurizers[feat_type] - # return featurizer.from_config(cfg, log="log" in cfg['feat_type']) - return featurizer.from_config(cfg, log="log" in feat_type) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py deleted file mode 100644 index fb04c5da882..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -import string -import os - -from .text import _clean_text - - -def normalize_string(s, labels, table, **unused_kwargs): - """ - Normalizes string. For example: - 'call me at 8:00 pm!' -> 'call me at eight zero pm' - - Args: - s: string to normalize - labels: labels used during model training. - - Returns: - Normalized string - """ - - def good_token(token, labels): - s = set(labels) - for t in token: - if t not in s: - return False - return True - - try: - text = _clean_text(s, ["english_cleaners"], table).strip() - return ''.join([t for t in text if good_token(t, labels=labels)]) - except: - print("WARNING: Normalizing {} failed".format(s)) - return None - - -class Manifest(object): - def __init__(self, data_dir, manifest_paths, labels, blank_index, max_duration=None, pad_to_max=False, - min_duration=None, sort_by_duration=False, max_utts=0, - normalize=True, speed_perturbation=False, filter_speed=1.0): - self.labels_map = dict([(labels[i], i) for i in range(len(labels))]) - self.blank_index = blank_index - self.max_duration = max_duration - ids = [] - duration = 0.0 - filtered_duration = 0.0 - - # If removing punctuation, make a list of punctuation to remove - table = None - if normalize: - # Punctuation to remove - punctuation = string.punctuation - punctuation = punctuation.replace("+", "") - punctuation = punctuation.replace("&", "") - # We might also want to consider: - # @ -> at - # -> number, pound, hashtag - # ~ -> tilde - # _ -> underscore - # % -> percent - # If a punctuation symbol is inside our vocab, we do not remove from text - for l in labels: - punctuation = punctuation.replace(l, "") - # Turn all punctuation to whitespace - table = str.maketrans(punctuation, " " * len(punctuation)) - for manifest_path in manifest_paths: - with open(manifest_path, "r", encoding="utf-8") as fh: - a = json.load(fh) - for data in a: - files_and_speeds = data['files'] - - if pad_to_max: - if not speed_perturbation: - min_speed = filter_speed - else: - min_speed = min(x['speed'] - for x in files_and_speeds) - max_duration = self.max_duration * min_speed - - data['duration'] = data['original_duration'] - if min_duration is not None and data['duration'] < min_duration: - filtered_duration += data['duration'] - continue - if max_duration is not None and data['duration'] > max_duration: - filtered_duration += data['duration'] - continue - - # Prune and normalize according to transcript - transcript_text = data[ - 'transcript'] if "transcript" in data else self.load_transcript( - data['text_filepath']) - if normalize: - transcript_text = normalize_string(transcript_text, labels=labels, - table=table) - if not isinstance(transcript_text, str): - print( - "WARNING: Got transcript: {}. It is not a string. Dropping data point".format( - transcript_text)) - filtered_duration += data['duration'] - continue - data["transcript"] = self.parse_transcript( - transcript_text) # convert to vocab indices - - if speed_perturbation: - audio_paths = [x['fname'] for x in files_and_speeds] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds] - else: - audio_paths = [ - x['fname'] for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_duration'] = [x['duration'] - for x in files_and_speeds if x['speed'] == filter_speed] - data['audio_filepath'] = [os.path.join( - data_dir, x) for x in audio_paths] - data.pop('files') - data.pop('original_duration') - - ids.append(data) - duration += data['duration'] - - if max_utts > 0 and len(ids) >= max_utts: - print( - 'Stopping parsing %s as max_utts=%d' % (manifest_path, max_utts)) - break - - if sort_by_duration: - ids = sorted(ids, key=lambda x: x['duration']) - self._data = ids - self._size = len(ids) - self._duration = duration - self._filtered_duration = filtered_duration - - def load_transcript(self, transcript_path): - with open(transcript_path, 'r', encoding="utf-8") as transcript_file: - transcript = transcript_file.read().replace('\n', '') - return transcript - - def parse_transcript(self, transcript): - chars = [self.labels_map.get(x, self.blank_index) - for x in list(transcript)] - transcript = list(filter(lambda x: x != self.blank_index, chars)) - return transcript - - def __getitem__(self, item): - return self._data[item] - - def __len__(self): - return self._size - - def __iter__(self): - return iter(self._data) - - @property - def duration(self): - return self._duration - - @property - def filtered_duration(self): - return self._filtered_duration - - @property - def data(self): - return list(self._data) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py deleted file mode 100644 index 08aa5c6a492..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import librosa -import soundfile as sf - - -class AudioSegment(object): - """Monaural audio segment abstraction. - :param samples: Audio samples [num_samples x num_channels]. - :type samples: ndarray.float32 - :param sample_rate: Audio sample rate. - :type sample_rate: int - :raises TypeError: If the sample data type is not float or int. - """ - - def __init__(self, samples, sample_rate, target_sr=None, trim=False, - trim_db=60): - """Create audio segment from samples. - Samples are convert float32 internally, with int scaled to [-1, 1]. - """ - samples = self._convert_samples_to_float32(samples) - if target_sr is not None and target_sr != sample_rate: - samples = librosa.core.resample(samples, sample_rate, target_sr) - sample_rate = target_sr - if trim: - samples, _ = librosa.effects.trim(samples, trim_db) - self._samples = samples - self._sample_rate = sample_rate - if self._samples.ndim >= 2: - self._samples = np.mean(self._samples, 1) - - def __eq__(self, other): - """Return whether two objects are equal.""" - if type(other) is not type(self): - return False - if self._sample_rate != other._sample_rate: - return False - if self._samples.shape != other._samples.shape: - return False - if np.any(self.samples != other._samples): - return False - return True - - def __ne__(self, other): - """Return whether two objects are unequal.""" - return not self.__eq__(other) - - def __str__(self): - """Return human-readable representation of segment.""" - return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, " - "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate, - self.duration, self.rms_db)) - - @staticmethod - def _convert_samples_to_float32(samples): - """Convert sample type to float32. - Audio sample type is usually integer or float-point. - Integers will be scaled to [-1, 1] in float32. - """ - float32_samples = samples.astype('float32') - if samples.dtype in np.sctypes['int']: - bits = np.iinfo(samples.dtype).bits - float32_samples *= (1. / 2 ** (bits - 1)) - elif samples.dtype in np.sctypes['float']: - pass - else: - raise TypeError("Unsupported sample type: %s." % samples.dtype) - return float32_samples - - @classmethod - def from_file(cls, filename, target_sr=None, int_values=False, offset=0, - duration=0, trim=False): - """ - Load a file supported by librosa and return as an AudioSegment. - :param filename: path of file to load - :param target_sr: the desired sample rate - :param int_values: if true, load samples as 32-bit integers - :param offset: offset in seconds when loading audio - :param duration: duration in seconds when loading audio - :return: numpy array of samples - """ - with sf.SoundFile(filename, 'r') as f: - dtype = 'int32' if int_values else 'float32' - sample_rate = f.samplerate - if offset > 0: - f.seek(int(offset * sample_rate)) - if duration > 0: - samples = f.read(int(duration * sample_rate), dtype=dtype) - else: - samples = f.read(dtype=dtype) - samples = samples.transpose() - return cls(samples, sample_rate, target_sr=target_sr, trim=trim) - - @property - def samples(self): - return self._samples.copy() - - @property - def sample_rate(self): - return self._sample_rate - - @property - def num_samples(self): - return self._samples.shape[0] - - @property - def duration(self): - return self._samples.shape[0] / float(self._sample_rate) - - @property - def rms_db(self): - mean_square = np.mean(self._samples ** 2) - return 10 * np.log10(mean_square) - - def gain_db(self, gain): - self._samples *= 10. ** (gain / 20.) - - def pad(self, pad_size, symmetric=False): - """Add zero padding to the sample. The pad size is given in number of samples. - If symmetric=True, `pad_size` will be added to both sides. If false, `pad_size` - zeros will be added only to the end. - """ - self._samples = np.pad(self._samples, - (pad_size if symmetric else 0, pad_size), - mode='constant') - - def subsegment(self, start_time=None, end_time=None): - """Cut the AudioSegment between given boundaries. - Note that this is an in-place transformation. - :param start_time: Beginning of subsegment in seconds. - :type start_time: float - :param end_time: End of subsegment in seconds. - :type end_time: float - :raise ValueError: If start_time or end_time is incorrectly set, e.g. out - of bounds in time. - """ - start_time = 0.0 if start_time is None else start_time - end_time = self.duration if end_time is None else end_time - if start_time < 0.0: - start_time = self.duration + start_time - if end_time < 0.0: - end_time = self.duration + end_time - if start_time < 0.0: - raise ValueError("The slice start position (%f s) is out of " - "bounds." % start_time) - if end_time < 0.0: - raise ValueError("The slice end position (%f s) is out of bounds." % - end_time) - if start_time > end_time: - raise ValueError("The slice start position (%f s) is later than " - "the end position (%f s)." % (start_time, end_time)) - if end_time > self.duration: - raise ValueError("The slice end position (%f s) is out of bounds " - "(> %f s)" % (end_time, self.duration)) - start_sample = int(round(start_time * self._sample_rate)) - end_sample = int(round(end_time * self._sample_rate)) - self._samples = self._samples[start_sample:end_sample] diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE deleted file mode 100644 index 4ad4ed1d5e3..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2017 Keith Ito - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py deleted file mode 100644 index 61936879a95..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) 2017 Keith Ito -""" from https://github.com/keithito/tacotron """ -from . import cleaners - - -def _clean_text(text, cleaner_names, *args): - for name in cleaner_names: - cleaner = getattr(cleaners, name) - if not cleaner: - raise Exception('Unknown cleaner: %s' % name) - text = cleaner(text, *args) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py deleted file mode 100644 index e1e52af5f37..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" from https://github.com/keithito/tacotron -Modified to add puncturation removal -""" - -''' -Cleaners are transformations that run over the input text at both training and eval time. - -Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners" -hyperparameter. Some cleaners are English-specific. You'll typically want to use: - 1. "english_cleaners" for English text - 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using - the Unidecode library (https://pypi.python.org/pypi/Unidecode) - 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update - the symbols in symbols.py to match your data). - -''' - - -# Regular expression matching whitespace: -import re -from text_unidecode import unidecode -from .numbers import normalize_numbers -_whitespace_re = re.compile(r'\s+') - -# List of (regular expression, replacement) pairs for abbreviations: -_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [ - ('mrs', 'misess'), - ('mr', 'mister'), - ('dr', 'doctor'), - ('st', 'saint'), - ('co', 'company'), - ('jr', 'junior'), - ('maj', 'major'), - ('gen', 'general'), - ('drs', 'doctors'), - ('rev', 'reverend'), - ('lt', 'lieutenant'), - ('hon', 'honorable'), - ('sgt', 'sergeant'), - ('capt', 'captain'), - ('esq', 'esquire'), - ('ltd', 'limited'), - ('col', 'colonel'), - ('ft', 'fort'), -]] - - -def expand_abbreviations(text): - for regex, replacement in _abbreviations: - text = re.sub(regex, replacement, text) - return text - - -def expand_numbers(text): - return normalize_numbers(text) - - -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, ' ', text) - - -def convert_to_ascii(text): - return unidecode(text) - - -def remove_punctuation(text, table): - text = text.translate(table) - text = re.sub(r'&', " and ", text) - text = re.sub(r'\+', " plus ", text) - return text - - -def basic_cleaners(text): - '''Basic pipeline that lowercases and collapses whitespace without transliteration.''' - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def transliteration_cleaners(text): - '''Pipeline for non-English text that transliterates to ASCII.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = collapse_whitespace(text) - return text - - -def english_cleaners(text, table=None): - '''Pipeline for English text, including number and abbreviation expansion.''' - text = convert_to_ascii(text) - text = lowercase(text) - text = expand_numbers(text) - text = expand_abbreviations(text) - if table is not None: - text = remove_punctuation(text, table) - text = collapse_whitespace(text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py deleted file mode 100644 index 3d2f77121c8..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2017 Keith Ito -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" from https://github.com/keithito/tacotron -Modifed to add support for time and slight tweaks to _expand_number -""" - -import inflect -import re - - -_inflect = inflect.engine() -_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])') -_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)') -_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)') -_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)') -_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)') -_number_re = re.compile(r'[0-9]+') -_time_re = re.compile(r'([0-9]{1,2}):([0-9]{2})') - - -def _remove_commas(m): - return m.group(1).replace(',', '') - - -def _expand_decimal_point(m): - return m.group(1).replace('.', ' point ') - - -def _expand_dollars(m): - match = m.group(1) - parts = match.split('.') - if len(parts) > 2: - return match + ' dollars' # Unexpected format - dollars = int(parts[0]) if parts[0] else 0 - cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 - if dollars and cents: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit) - elif dollars: - dollar_unit = 'dollar' if dollars == 1 else 'dollars' - return '%s %s' % (dollars, dollar_unit) - elif cents: - cent_unit = 'cent' if cents == 1 else 'cents' - return '%s %s' % (cents, cent_unit) - else: - return 'zero dollars' - - -def _expand_ordinal(m): - return _inflect.number_to_words(m.group(0)) - - -def _expand_number(m): - if int(m.group(0)[0]) == 0: - return _inflect.number_to_words(m.group(0), andword='', group=1) - num = int(m.group(0)) - if num > 1000 and num < 3000: - if num == 2000: - return 'two thousand' - elif num > 2000 and num < 2010: - return 'two thousand ' + _inflect.number_to_words(num % 100) - elif num % 100 == 0: - return _inflect.number_to_words(num // 100) + ' hundred' - else: - return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') - # Add check for number phones and other large numbers - elif num > 1000000000 and num % 10000 != 0: - return _inflect.number_to_words(num, andword='', group=1) - else: - return _inflect.number_to_words(num, andword='') - - -def _expand_time(m): - mins = int(m.group(2)) - if mins == 0: - return _inflect.number_to_words(m.group(1)) - return " ".join([_inflect.number_to_words(m.group(1)), _inflect.number_to_words(m.group(2))]) - - -def normalize_numbers(text): - text = re.sub(_comma_number_re, _remove_commas, text) - text = re.sub(_pounds_re, r'\1 pounds', text) - text = re.sub(_dollars_re, _expand_dollars, text) - text = re.sub(_decimal_number_re, _expand_decimal_point, text) - text = re.sub(_ordinal_re, _expand_ordinal, text) - text = re.sub(_number_re, _expand_number, text) - text = re.sub(_time_re, _expand_time, text) - return text diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py deleted file mode 100644 index 581885466b0..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Tuple - -import torch -import torch.nn as nn - -from helpers import Optimization -from parts.features import FeatureFactory - - -class AudioPreprocessing(nn.Module): - """GPU accelerated audio preprocessing - """ - - def __init__(self, **kwargs): - nn.Module.__init__(self) # For PyTorch API - self.optim_level = kwargs.get( - 'optimization_level', Optimization.nothing) - self.featurizer = FeatureFactory.from_config(kwargs) - - def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]: - input_signal, length = x - length.requires_grad_(False) - processed_signal = self.featurizer(x) - processed_length = self.featurizer.get_seq_len(length) - return processed_signal, processed_length diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py deleted file mode 100644 index 9bbea9c0a67..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import torch - -from typing import Optional, Tuple - - -def rnn(rnn, input_size, hidden_size, num_layers, norm=None, - forget_gate_bias=1.0, dropout=0.0, **kwargs): - """TODO""" - if rnn != "lstm": - raise ValueError(f"Unknown rnn={rnn}") - if norm not in [None]: - raise ValueError(f"unknown norm={norm}") - - if rnn == "lstm": - return LstmDrop( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - forget_gate_bias=forget_gate_bias, - **kwargs - ) - - -class LstmDrop(torch.nn.Module): - - def __init__(self, input_size, hidden_size, num_layers, dropout, forget_gate_bias, - **kwargs): - """Returns an LSTM with forget gate bias init to `forget_gate_bias`. - - Args: - input_size: See `torch.nn.LSTM`. - hidden_size: See `torch.nn.LSTM`. - num_layers: See `torch.nn.LSTM`. - dropout: See `torch.nn.LSTM`. - forget_gate_bias: For each layer and each direction, the total value of - to initialise the forget gate bias to. - - Returns: - A `torch.nn.LSTM`. - """ - super(LstmDrop, self).__init__() - - self.lstm = torch.nn.LSTM( - input_size=input_size, - hidden_size=hidden_size, - num_layers=num_layers, - dropout=dropout, - ) - if forget_gate_bias is not None: - for name, v in self.lstm.named_parameters(): - if "bias_ih" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(forget_gate_bias) - if "bias_hh" in name: - bias = getattr(self.lstm, name) - bias.data[hidden_size:2 * hidden_size].fill_(0) - - if dropout: - self.inplace_dropout = torch.nn.Dropout(dropout, inplace=True) - else: - self.inplace_droput = None - - def forward(self, x: torch.Tensor, - h: Optional[Tuple[torch.Tensor, torch.Tensor]] = None): - x, h = self.lstm(x, h) - - if self.inplace_dropout is not None: - self.inplace_dropout(x.data) - - return x, h - - -class StackTime(torch.nn.Module): - - __constants__ = ["factor"] - - def __init__(self, factor): - super().__init__() - self.factor = int(factor) - - def forward(self, x, x_lens): - # T, B, U - seq = [x] - for i in range(1, self.factor): - # This doesn't seem to make much sense... - tmp = torch.zeros_like(x) - tmp[:-i, :, :] = x[i:, :, :] - seq.append(tmp) - x_lens = torch.ceil(x_lens.float() / self.factor).int() - # Gross, this is horrible. What a waste of memory... - return torch.cat(seq, dim=2)[::self.factor, :, :], x_lens diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh deleted file mode 100644 index cfdc97c010e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -docker build . --rm -t jasper \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh deleted file mode 100644 index 5c9c6a3f346..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -DATA_DIR=$1 -CHECKPOINT_DIR=$2 -RESULT_DIR=$3 - -docker run -it --rm \ - --gpus='"device=1"' \ - --shm-size=4g \ - --ulimit memlock=-1 \ - --ulimit stack=67108864 \ - -v "$DATA_DIR":/datasets \ - -v "$CHECKPOINT_DIR":/checkpoints/ \ - -v "$RESULT_DIR":/results/ \ - -v $PWD:/code \ - -v $PWD:/workspace/jasper \ - mlperf-rnnt-ref bash diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh deleted file mode 100644 index ee322fe3043..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/usr/bin/env bash - -DATA_SET="LibriSpeech" -DATA_ROOT_DIR="/datasets" -DATA_DIR="${DATA_ROOT_DIR}/${DATA_SET}" -if [ ! -d "$DATA_DIR" ] -then - mkdir $DATA_DIR - chmod go+rx $DATA_DIR - python utils/download_librispeech.py utils/librispeech.csv $DATA_DIR -e ${DATA_ROOT_DIR}/ -else - echo "Directory $DATA_DIR already exists." -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh deleted file mode 100644 index fcd472fd9aa..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -NUM_GPUS=${8:-1} -PRECISION=${9:-"fp32"} -NUM_STEPS=${10:-"-1"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_evaluation_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - - -CMD=" inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC " -CMD+=" $STEPS " - - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh deleted file mode 100644 index 2d4474ce2b7..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"false"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -MODELOUTPUT_FILE=${12:-"none"} -PREDICTION_FILE=${13:-"$RESULT_DIR/${DATASET}.predictions"} - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE) - printf -v TAG "jasper_inference_${DATASET}_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -PRED="" -if [ "$PREDICTION_FILE" = "none" ] ; then - PRED="" -else - PRED=" --save_prediction $PREDICTION_FILE" -fi - -OUTPUT="" -if [ "$MODELOUTPUT_FILE" = "none" ] ; then - OUTPUT=" " -else - OUTPUT=" --logits_save_to $MODELOUTPUT_FILE" -fi - - -if [ "$CUDNN_BENCHMARK" = "true" ]; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi - -CMD=" python inference.py " -CMD+=" --batch_size $BATCH_SIZE " -CMD+=" --dataset_dir $DATA_DIR " -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --model_toml $MODEL_CONFIG " -CMD+=" --seed $SEED " -CMD+=" --ckpt $CHECKPOINT " -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PRED " -CMD+=" $OUTPUT " -CMD+=" $PREC " -CMD+=" $STEPS " - - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" -fi -set +x -echo "MODELOUTPUT_FILE: ${MODELOUTPUT_FILE}" -echo "PREDICTION_FILE: ${PREDICTION_FILE}" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh deleted file mode 100644 index 7aeea84c159..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -DATASET=${2:-"dev-clean"} -MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${4:-"/results"} -CHECKPOINT=$5 -CREATE_LOGFILE=${6:-"true"} -CUDNN_BENCHMARK=${7:-"true"} -PRECISION=${8:-"fp32"} -NUM_STEPS=${9:-"-1"} -MAX_DURATION=${10:-"36"} -SEED=${11:-0} -BATCH_SIZE=${12:-64} - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi -STEPS="" -if [ "$NUM_STEPS" -gt 0 ] ; then - STEPS=" --steps $NUM_STEPS" -fi -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN_BENCHMARK=" --cudnn_benchmark" -else - CUDNN_BENCHMARK="" -fi - -CMD=" python inference_benchmark.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --seed=$SEED" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json " -CMD+=" --ckpt=$CHECKPOINT" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to=-1" -CMD+=" $CUDNN_BENCHMARK" -CMD+=" $PREC" -CMD+=" $STEPS" - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE ) - printf -v TAG "jasper_inference_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee "$LOGFILE" - grep 'latency' "$LOGFILE" -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh deleted file mode 100644 index 7cfe5cc6a57..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/usr/bin/env bash - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-100 \ - --dest_dir /datasets/LibriSpeech/train-clean-100-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-100-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-clean-360 \ - --dest_dir /datasets/LibriSpeech/train-clean-360-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-clean-360-wav.json \ - --speed 0.9 1.1 -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/train-other-500 \ - --dest_dir /datasets/LibriSpeech/train-other-500-wav \ - --output_json /datasets/LibriSpeech/librispeech-train-other-500-wav.json \ - --speed 0.9 1.1 - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-clean \ - --dest_dir /datasets/LibriSpeech/dev-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/dev-other \ - --dest_dir /datasets/LibriSpeech/dev-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-dev-other-wav.json - - -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-clean \ - --dest_dir /datasets/LibriSpeech/test-clean-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-clean-wav.json -python ./utils/convert_librispeech.py \ - --input_dir /datasets/LibriSpeech/test-other \ - --dest_dir /datasets/LibriSpeech/test-other-wav \ - --output_json /datasets/LibriSpeech/librispeech-test-other-wav.json diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh deleted file mode 100644 index d59ce8ebeb2..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# Copyright (c) 2019, Myrtle Software Limited. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -#!/bin/bash -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/rnnt.toml"} -RESULT_DIR=${3:-"/results"} -CHECKPOINT=${4:-"none"} -CREATE_LOGFILE=${5:-"true"} -CUDNN_BENCHMARK=${6:-"true"} -NUM_GPUS=${7:-8} -PRECISION=${8:-"fp16"} -EPOCHS=${9:-100} -SEED=${10:-6} -BATCH_SIZE=${11:-8} -EVAL_BATCH_SIZE=${11:-2} -LEARNING_RATE=${12:-"0.001"} -LEARNING_RATE_WARMUP=${12:-"8000"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -LAUNCH_OPT=${LAUNCH_OPT:-"none"} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC="--fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] && [ "$PRECISION" = "fp16" ]; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - - -if [ "$CHECKPOINT" = "none" ] ; then - CHECKPOINT="" -else - CHECKPOINT=" --ckpt=${CHECKPOINT}" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --eval_batch_size=$EVAL_BATCH_SIZE" -CMD+=" --num_epochs=$EPOCHS" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --lr_warmup=$LEARNING_RATE_WARMUP" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=adam" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100" -CMD+=" --eval_freq=1" -CMD+=" --train_freq=250" -CMD+=" --lr_decay" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS " -CMD+=" $CHECKPOINT" -CMD+=" $PREC" -CMD+=" $CUDNN" - - -if [ "${LAUNCH_OPT}" != "none" ]; then - CMD="python -m $LAUNCH_OPT $CMD" -elif [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m multiproc --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "rnnt_train_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE=$RESULT_DIR/$TAG.$DATESTAMP.log - printf "Logs written to %s\n" "$LOGFILE" -fi - -set -x -if [ -z "$LOGFILE" ] ; then - $CMD -else - ( - $CMD - ) |& tee $LOGFILE -fi -set +x diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh deleted file mode 100644 index 7b5a33705ca..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#!/bin/bash - -echo "Container nvidia build = " $NVIDIA_BUILD_ID - -DATA_DIR=${1:-"/datasets/LibriSpeech"} -MODEL_CONFIG=${2:-"configs/jasper10x5dr_sp_offline_specaugment.toml"} -RESULT_DIR=${3:-"/results"} -CREATE_LOGFILE=${4:-"true"} -CUDNN_BENCHMARK=${5:-"true"} -NUM_GPUS=${6:-8} -PRECISION=${7:-"fp16"} -NUM_STEPS=${8:-"-1"} -MAX_DURATION=${9:-16.7} -SEED=${10:-0} -BATCH_SIZE=${11:-64} -LEARNING_RATE=${12:-"0.015"} -GRADIENT_ACCUMULATION_STEPS=${13:-1} -PRINT_FREQUENCY=${14:-1} - - -PREC="" -if [ "$PRECISION" = "fp16" ] ; then - PREC=" --fp16" -elif [ "$PRECISION" = "fp32" ] ; then - PREC="" -else - echo "Unknown argument" - exit -2 -fi - -STEPS="" -if [ "$NUM_STEPS" -ne "-1" ] ; then - STEPS=" --num_steps=$NUM_STEPS" -elif [ "$NUM_STEPS" = "-1" ] ; then - STEPS="" -else - echo "Unknown argument" - exit -2 -fi - -CUDNN="" -if [ "$CUDNN_BENCHMARK" = "true" ] ; then - CUDNN=" --cudnn" -else - CUDNN="" -fi - - -CMD=" train.py" -CMD+=" --batch_size=$BATCH_SIZE" -CMD+=" --num_epochs=400" -CMD+=" --output_dir=$RESULT_DIR" -CMD+=" --model_toml=$MODEL_CONFIG" -CMD+=" --lr=$LEARNING_RATE" -CMD+=" --seed=$SEED" -CMD+=" --optimizer=novograd" -CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS" -CMD+=" --dataset_dir=$DATA_DIR" -CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json" -CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json" -CMD+=" --weight_decay=1e-3" -CMD+=" --save_freq=100000" -CMD+=" --eval_freq=100000" -CMD+=" --max_duration=$MAX_DURATION" -CMD+=" --pad_to_max" -CMD+=" --train_freq=$PRINT_FREQUENCY" -CMD+=" --lr_decay" -CMD+=" $CUDNN" -CMD+=" $PREC" -CMD+=" $STEPS" - -if [ "$NUM_GPUS" -gt 1 ] ; then - CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD" -else - CMD="python3 $CMD" -fi - - -if [ "$CREATE_LOGFILE" = "true" ] ; then - export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS) - printf -v TAG "jasper_train_benchmark_%s_gbs%d" "$PRECISION" $GBS - DATESTAMP=`date +'%y%m%d%H%M%S'` - LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log" - printf "Logs written to %s\n" "$LOGFILE" - -fi - -if [ -z "$LOGFILE" ] ; then - - set -x - $CMD - set +x -else - - set -x - ( - $CMD - ) |& tee "$LOGFILE" - - set +x - - mean_latency=`cat "$LOGFILE" | grep 'Step time' | awk '{print $3}' | tail -n +2 | egrep -o '[0-9.]+'| awk 'BEGIN {total=0} {total+=$1} END {printf("%.2f\n",total/NR)}'` - mean_throughput=`python -c "print($BATCH_SIZE*$NUM_GPUS/${mean_latency})"` - training_wer_per_pgu=`cat "$LOGFILE" | grep 'training_batch_WER'| awk '{print $2}' | tail -n 1 | egrep -o '[0-9.]+'` - training_loss_per_pgu=`cat "$LOGFILE" | grep 'Loss@Step'| awk '{print $4}' | tail -n 1 | egrep -o '[0-9.]+'` - final_eval_wer=`cat "$LOGFILE" | grep 'Evaluation WER'| tail -n 1 | egrep -o '[0-9.]+'` - final_eval_loss=`cat "$LOGFILE" | grep 'Evaluation Loss'| tail -n 1 | egrep -o '[0-9.]+'` - - echo "max duration: $MAX_DURATION s" | tee -a "$LOGFILE" - echo "mean_latency: $mean_latency s" | tee -a "$LOGFILE" - echo "mean_throughput: $mean_throughput sequences/s" | tee -a "$LOGFILE" - echo "training_wer_per_pgu: $training_wer_per_pgu" | tee -a "$LOGFILE" - echo "training_loss_per_pgu: $training_loss_per_pgu" | tee -a "$LOGFILE" - echo "final_eval_loss: $final_eval_loss" | tee -a "$LOGFILE" - echo "final_eval_wer: $final_eval_wer" | tee -a "$LOGFILE" -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py deleted file mode 100644 index 09ce9a3a4ed..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import argparse -import os -import glob -import multiprocessing -import json - -import pandas as pd - -from preprocessing_utils import parallel_preprocess - -parser = argparse.ArgumentParser(description='Preprocess LibriSpeech.') -parser.add_argument('--input_dir', type=str, required=True, - help='LibriSpeech collection input dir') -parser.add_argument('--dest_dir', type=str, required=True, - help='Output dir') -parser.add_argument('--dest_list', type=str, required=False, - help='a file contains list of files needs to be converted') -parser.add_argument('--output_json', type=str, default='./', - help='name of the output json file.') -parser.add_argument('-s', '--speed', type=float, nargs='*', - help='Speed perturbation ratio') -parser.add_argument('--target_sr', type=int, default=None, - help='Target sample rate. ' - 'defaults to the input sample rate') -parser.add_argument('--overwrite', action='store_true', - help='Overwrite file if exists') -parser.add_argument('--parallel', type=int, default=multiprocessing.cpu_count(), - help='Number of threads to use when processing audio files') -args = parser.parse_args() - -args.input_dir = args.input_dir.rstrip('/') -args.dest_dir = args.dest_dir.rstrip('/') - - -def build_input_arr(input_dir): - txt_files = glob.glob(os.path.join(input_dir, '**', '*.trans.txt'), - recursive=True) - input_data = [] - for txt_file in txt_files: - rel_path = os.path.relpath(txt_file, input_dir) - with open(txt_file) as fp: - for line in fp: - fname, _, transcript = line.partition(' ') - input_data.append(dict(input_relpath=os.path.dirname(rel_path), - input_fname=fname + '.flac', - transcript=transcript)) - return input_data - - -print("[%s] Scaning input dir..." % args.output_json) -dataset = build_input_arr(input_dir=args.input_dir) - -if args.dest_list != None: - dest_file = open (args.dest_list, "r") - dest_list = dest_file.readlines() -else: - dest_list = None -print("[%s] Converting audio files..." % args.output_json) -dataset = parallel_preprocess(dataset=dataset, - input_dir=args.input_dir, - dest_dir=args.dest_dir, - dest_list=dest_list, - target_sr=args.target_sr, - speed=args.speed, - overwrite=args.overwrite, - parallel=args.parallel) - -print("[%s] Generating json..." % args.output_json) -df = pd.DataFrame(dataset, dtype=object) - -# Save json with python. df.to_json() produces back slashed in file paths -dataset = df.to_dict(orient='records') -with open(args.output_json, 'w') as fp: - json.dump(dataset, fp, indent=2) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py deleted file mode 100644 index f7e5eda1309..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import argparse -import pandas as pd - -from download_utils import download_file, md5_checksum, extract - -parser = argparse.ArgumentParser( - description='Download, verify and extract dataset files') -parser.add_argument('csv', type=str, - help='CSV file with urls and checksums to download.') -parser.add_argument('dest', type=str, - help='Download destnation folder.') -parser.add_argument('-e', type=str, default=None, - help='Extraction destnation folder. Defaults to download folder if not provided') -parser.add_argument('--skip_download', action='store_true', - help='Skip downloading the files') -parser.add_argument('--skip_checksum', action='store_true', - help='Skip checksum') -parser.add_argument('--skip_extract', action='store_true', - help='Skip extracting files') -args = parser.parse_args() -args.e = args.e or args.dest - - -df = pd.read_csv(args.csv, delimiter=',') - - -if not args.skip_download: - for url in df.url: - fname = url.split('/')[-1] - print("Downloading %s:" % fname) - download_file(url=url, dest_folder=args.dest, fname=fname) -else: - print("Skipping file download") - - -if not args.skip_checksum: - for index, row in df.iterrows(): - url = row['url'] - md5 = row['md5'] - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Verifing %s: " % fname, end='') - ret = md5_checksum(fpath=fpath, target_hash=md5) - if not ret: - raise ValueError(f"Checksum for {fname} failed!") - else: - print(f"Checksum correct for {fname}") -else: - print("Skipping checksum") - - -if not args.skip_extract: - for url in df.url: - fname = url.split('/')[-1] - fpath = os.path.join(args.dest, fname) - print("Decompressing %s:" % fpath) - extract(fpath=fpath, dest_folder=args.e) -else: - print("Skipping file extraction") diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py deleted file mode 100644 index bda4193fbb0..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import hashlib -import requests -import os -import tarfile -import tqdm - - -def download_file(url, dest_folder, fname, overwrite=False): - fpath = os.path.join(dest_folder, fname) - if os.path.isfile(fpath): - if overwrite: - print("Overwriting existing file") - else: - print("File exists, skipping download.") - return - - tmp_fpath = fpath + '.tmp' - - r = requests.get(url, stream=True) - file_size = int(r.headers['Content-Length']) - chunk_size = 1024 * 1024 # 1MB - total_chunks = int(file_size / chunk_size) - - with open(tmp_fpath, 'wb') as fp: - content_iterator = r.iter_content(chunk_size=chunk_size) - chunks = tqdm.tqdm(content_iterator, total=total_chunks, - unit='MB', desc=fpath, leave=True) - for chunk in chunks: - fp.write(chunk) - - os.rename(tmp_fpath, fpath) - - -def md5_checksum(fpath, target_hash): - file_hash = hashlib.md5() - with open(fpath, "rb") as fp: - for chunk in iter(lambda: fp.read(1024 * 1024), b""): - file_hash.update(chunk) - return file_hash.hexdigest() == target_hash - - -def extract(fpath, dest_folder): - if fpath.endswith('.tar.gz'): - mode = 'r:gz' - elif fpath.endswith('.tar'): - mode = 'r:' - else: - raise IOError('fpath has unknown extention: %s' % fpath) - - with tarfile.open(fpath, mode) as tar: - members = tar.getmembers() - for member in tqdm.tqdm(iterable=members, total=len(members), leave=True): - tar.extract(path=dest_folder, member=member) diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv deleted file mode 100644 index 40dac4e0e61..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv +++ /dev/null @@ -1,5 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 -http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931 -http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9 -http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv deleted file mode 100644 index b5e43b222e6..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv +++ /dev/null @@ -1,2 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 \ No newline at end of file diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv deleted file mode 100644 index d48a9f8db72..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv +++ /dev/null @@ -1,8 +0,0 @@ -url,md5 -http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1 -http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931 -http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9 -http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135 -http://www.openslr.org/resources/12/train-clean-100.tar.gz,2a93770f6d5c6c964bc36631d331a522 -http://www.openslr.org/resources/12/train-clean-360.tar.gz,c0e676e450a7ff2f54aeade5171606fa -http://www.openslr.org/resources/12/train-other-500.tar.gz,d1a0fd59409feb2c614ce4d30c387708 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py deleted file mode 100644 index e0ce22b9a13..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import multiprocessing -import functools - -import sox - - -from tqdm import tqdm - - -def preprocess(data, input_dir, dest_dir, dest_list, target_sr=None, speed=None, - overwrite=True): - speed = speed or [] - speed.append(1) - speed = list(set(speed)) # Make uniqe - - input_fname = os.path.join(input_dir, - data['input_relpath'], - data['input_fname']) - input_sr = sox.file_info.sample_rate(input_fname) - target_sr = target_sr or input_sr - - os.makedirs(os.path.join(dest_dir, data['input_relpath']), exist_ok=True) - - output_dict = {} - output_dict['transcript'] = data['transcript'].lower().strip() - output_dict['files'] = [] - - fname = os.path.splitext(data['input_fname'])[0] - for s in speed: - output_fname = fname + \ - '{}.wav'.format('' if s == 1 else '-{}'.format(s)) - output_fpath = os.path.join(dest_dir, - data['input_relpath'], - output_fname) - output_rel_fpath = os.path.join('train-clean-100-wav', - data['input_relpath'], - output_fname+"\n") - - if dest_list != None and not output_rel_fpath in dest_list: - return None - if not os.path.exists(output_fpath) or overwrite: - cbn = sox.Transformer().speed(factor=s).convert(target_sr) - cbn.build(input_fname, output_fpath) - - file_info = sox.file_info.info(output_fpath) - file_info['fname'] = os.path.join(os.path.basename(dest_dir), - data['input_relpath'], - output_fname) - file_info['speed'] = s - output_dict['files'].append(file_info) - - if s == 1: - file_info = sox.file_info.info(output_fpath) - output_dict['original_duration'] = file_info['duration'] - output_dict['original_num_samples'] = file_info['num_samples'] - - return output_dict - - -def parallel_preprocess(dataset, input_dir, dest_dir, dest_list, target_sr, speed, overwrite, parallel): - with multiprocessing.Pool(parallel) as p: - func = functools.partial(preprocess, - input_dir=input_dir, dest_dir=dest_dir, dest_list=dest_list, - target_sr=target_sr, speed=speed, overwrite=overwrite) - dataset = list(tqdm(p.imap(func, dataset), total=len(dataset))) - result = [] - for data in dataset: - if data != None: - result.append(data) - return result diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py deleted file mode 100644 index f054d5da6b7..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py +++ /dev/null @@ -1,159 +0,0 @@ -# copyright (c) 2020, Cerebras Systems, Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) - -import array -import numpy as np -import toml -import mlperf_loadgen as lg - -from QSL import AudioQSL, AudioQSLInMemory -from helpers import add_blank_label - -import torch - - -def load_and_migrate_checkpoint(ckpt_path): - checkpoint = torch.load(ckpt_path, map_location="cpu") - migrated_state_dict = {} - for key, value in checkpoint['state_dict'].items(): - key = key.replace("joint_net", "joint.net") - migrated_state_dict[key] = value - del migrated_state_dict["audio_preprocessor.featurizer.fb"] - del migrated_state_dict["audio_preprocessor.featurizer.window"] - return migrated_state_dict - - -class PytorchSUT: - def __init__(self, config_toml, checkpoint_path, dataset_dir, manifest_filepath, - perf_count, bf16=False, int8=False, configure_file=""): - self.bf16 = bf16 - self.int8 = int8 - self.configure_file = configure_file - config = toml.load(config_toml) - - dataset_vocab = config['labels']['labels'] - featurizer_config = config['input_eval'] - - self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, - self.process_latencies) - self.qsl = AudioQSLInMemory(dataset_dir, - manifest_filepath, - dataset_vocab, - featurizer_config["sample_rate"], - perf_count) - - from decoders import ScriptGreedyDecoder - from model_separable_rnnt import RNNT - from preprocessing import AudioPreprocessing - rnnt_vocab = add_blank_label(dataset_vocab) - self.audio_preprocessor = AudioPreprocessing(**featurizer_config) - self.audio_preprocessor.eval() - self.audio_preprocessor = torch.jit.script(self.audio_preprocessor) - self.audio_preprocessor = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(self.audio_preprocessor._c)) - - model = RNNT( - feature_config=featurizer_config, - rnnt=config['rnnt'], - num_classes=len(rnnt_vocab) - ) - model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path), - strict=True) - - import intel_pytorch_extension as ipex - if self.bf16: - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - ipex.core.enable_auto_dnnl() - model = model.to(ipex.DEVICE) - - model.eval() - """ - if not self.ipex: - model.encoder = torch.jit.script(model.encoder) - model.encoder = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(model.encoder._c)) - model.prediction = torch.jit.script(model.prediction) - model.prediction = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(model.prediction._c)) - """ - model.joint = torch.jit.script(model.joint) - model.joint = torch.jit._recursive.wrap_cpp_module( - torch._C._freeze_module(model.joint._c)) - """ - if not self.ipex: - model = torch.jit.script(model) - """ - - self.greedy_decoder = ScriptGreedyDecoder(len(rnnt_vocab) - 1, model) - - def issue_queries(self, query_samples): - import intel_pytorch_extension as ipex - conf = None - if self.int8: - conf = ipex.AmpConf(torch.int8, self.configure_file) - for query_sample in query_samples: - waveform = self.qsl[query_sample.index] - assert waveform.ndim == 1 - waveform_length = np.array(waveform.shape[0], dtype=np.int64) - waveform = np.expand_dims(waveform, 0) - waveform_length = np.expand_dims(waveform_length, 0) - with torch.no_grad(): - waveform = torch.from_numpy(waveform) - waveform_length = torch.from_numpy(waveform_length) - feature, feature_length = self.audio_preprocessor.forward((waveform, waveform_length)) - assert feature.ndim == 3 - assert feature_length.ndim == 1 - # RNNT can run in the following precision combinations: - # encoder | decoder | --bf16 | --int8 - # --------------+-----------+-----------+--------- - # FP32 | FP32 | False | False - # BF16 | BF16 | True | False - # INT8 | BF16 | True | True - # INT8 | FP32 | False | True - if self.bf16 and not self.int8: - # set bf16 mode globally for both encoder and decoder - ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - ipex.core.enable_auto_dnnl() - feature = feature.to(ipex.DEVICE) - feature_length = feature_length.to(ipex.DEVICE) - feature = feature.permute(2, 0, 1) - - # _, _, transcript = self.greedy_decoder.forward(feature, feature_length) - _, _, transcript, dur_enc, dur_dec = self.greedy_decoder.forward_single_batch(feature, feature_length, conf, self.int8, self.bf16) - - assert len(transcript) == 1 - response_array = array.array('q', transcript[0]) - bi = response_array.buffer_info() - response = lg.QuerySampleResponse(query_sample.id, bi[0], - bi[1] * response_array.itemsize) - lg.QuerySamplesComplete([response]) - - def flush_queries(self): - pass - - def process_latencies(self, latencies_ns): - print("Average latency (ms) per query:") - print(np.mean(latencies_ns)/1000000.0) - print("Median latency (ms): ") - print(np.percentile(latencies_ns, 50)/1000000.0) - print("90 percentile latency (ms): ") - print(np.percentile(latencies_ns, 90)/1000000.0) - - def __del__(self): - lg.DestroySUT(self.sut) - print("Finished destroying SUT.") diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py deleted file mode 100644 index f82c651aa21..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2020 The MLPerf Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= - -import argparse -import mlperf_loadgen as lg -import subprocess - -import os -from pathlib import Path -import re -import toml -import torch - -import numpy as np -from numpy.core.numeric import full -from pytorch_SUT import PytorchSUT - -MLPERF_CONF = Path(os.path.dirname(os.path.realpath(__file__))) / "./mlperf.conf" -MLPERF_CONF = MLPERF_CONF.resolve() - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--backend", choices=["pytorch"], default="pytorch", help="Backend") - parser.add_argument("--scenario", choices=["SingleStream", "Offline", "Server"], default="Offline", help="Scenario") - # parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") - parser.add_argument("--mlperf_conf", default=str(MLPERF_CONF), help="mlperf rules config") - parser.add_argument("--user_conf", default="user.conf", help="user config for user LoadGen settings such as target QPS") - parser.add_argument("--pytorch_config_toml", default="pytorch/configs/rnnt.toml") - parser.add_argument("--pytorch_checkpoint", required=True) - parser.add_argument("--dataset_dir", required=True) - parser.add_argument("--manifest", required=True) - parser.add_argument("--perf_count", type=int, default=None) - parser.add_argument("--profile", choices=["True", "Split", "False"], default="False") - parser.add_argument("--bf16", dest='bf16', action='store_true') - parser.add_argument("--int8", dest='int8', action='store_true') - parser.add_argument("--log_dir", required=True) - parser.add_argument("--configure_path", default="") - parser.add_argument('--tune', dest='tune', action='store_true', - help='tune best int8 model with Neural Compressor on calibration dataset') - parser.add_argument('--benchmark', dest='benchmark', action='store_true', - help='run benchmark') - parser.add_argument("--accuracy_only", dest='accuracy_only', action='store_true', - help='For accuracy measurement only.') - parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH', - help='path to checkpoint tuned by Neural Compressor (default: ./)') - args = parser.parse_args() - return args - - -scenario_map = { - "SingleStream": lg.TestScenario.SingleStream, - "Offline": lg.TestScenario.Offline, - "Server": lg.TestScenario.Server, -} - - -def main(): - args = get_args() - print ("Checking args: int8={}, bf16={}".format(args.int8, args.bf16)) - print(args) - - settings = lg.TestSettings() - settings.scenario = scenario_map[args.scenario] - settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) - settings.FromConfig(args.user_conf, "rnnt", args.scenario) - - if args.accuracy_only: - settings.mode = lg.TestMode.AccuracyOnly - else: - settings.mode = lg.TestMode.PerformanceOnly - - log_path = args.log_dir - os.makedirs(log_path, exist_ok=True) - log_output_settings = lg.LogOutputSettings() - log_output_settings.outdir = log_path - log_output_settings.copy_summary_to_stdout = True - log_settings = lg.LogSettings() - log_settings.log_output = log_output_settings - - pattern = ['accuracy=\d+.\d+', 'samples_per_query : \d+', 'Mean latency.*', 'Samples per second\\s*: \d+.\d+'] - - def eval_func(model): - print("Running Loadgen test...") - fullpath = None - use_int8 = False - settings.mode = lg.TestMode.AccuracyOnly - for path, dirs, files in os.walk('nc_workspace'): - if 'ipex_config_tmp.json' in files: - fullpath = os.path.join(path, 'ipex_config_tmp.json') - use_int8 = True - break - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count, - args.bf16, use_int8, fullpath) - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - cmd = f"python3 accuracy_eval.py --log_dir {log_path} \ - --dataset_dir {args.dataset_dir} --manifest {args.manifest}" - out = subprocess.check_output(cmd, shell=True) - out = out.decode() - regex_accu = re.compile(pattern[0]) - accu = float(regex_accu.findall(out)[0].split('=')[1]) - print('Accuracy: %.3f ' % (accu)) - return accu - - if args.tune: - import shutil - shutil.rmtree('nc_workspace', ignore_errors=True) - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count, - True, False, None) - model = sut.greedy_decoder._model.encoder - - class NC_dataloader(object): - def __init__(self, sut): - self.sut = sut - self.batch_size = 1 - - def __iter__(self): - for i in range(0, self.sut.qsl.count, self.batch_size): - waveform = self.sut.qsl[i] - assert waveform.ndim == 1 - waveform_length = np.array(waveform.shape[0], dtype=np.int64) - waveform = np.expand_dims(waveform, 0) - waveform_length = np.expand_dims(waveform_length, 0) - with torch.no_grad(): - waveform = torch.from_numpy(waveform) - waveform_length = torch.from_numpy(waveform_length) - feature, feature_length = self.sut.audio_preprocessor.forward((waveform, waveform_length)) - assert feature.ndim == 3 - assert feature_length.ndim == 1 - feature = feature.permute(2, 0, 1) - yield (feature, feature_length), None - - from neural_compressor.experimental import Quantization, common - calib_dataloader = NC_dataloader(sut) - quantizer = Quantization("./conf.yaml") - quantizer.model = common.Model(model) - quantizer.calib_dataloader = calib_dataloader - quantizer.eval_func = eval_func - q_model = quantizer.fit() - q_model.save(args.tuned_checkpoint) - return - - if args.backend == "pytorch": - config_file = None - if args.int8: - config_file = os.path.join(args.tuned_checkpoint, "best_configure.json") - assert os.path.exists(config_file), "there is no ipex config file, Please tune with Neural Compressor first!" - sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, - args.dataset_dir, args.manifest, args.perf_count, - args.bf16, args.int8, config_file) - else: - raise ValueError("Unknown backend: {:}".format(args.backend)) - - print("Running Loadgen test...") - lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) - - if args.accuracy_only: - cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" - print(f"Running accuracy script: {cmd}") - out = subprocess.check_output(cmd, shell=True) - out = out.decode() - regex_accu = re.compile(pattern[0]) - accu = float(regex_accu.findall(out)[0].split('=')[1]) - print('Accuracy: %.3f ' % (accu)) - else: - file_path = os.path.join(log_path, 'mlperf_log_summary.txt') - f = open(file_path, 'r', encoding='UTF-8') - file_content = f.read() - f.close() - regex_batch = re.compile(pattern[1]) - regex_late = re.compile(pattern[2]) - regex_perf = re.compile(pattern[3], flags=re.IGNORECASE) - latency_per_sample = float(regex_late.findall(file_content)[0].split(': ')[1]) - samples_per_s = float(regex_perf.findall(file_content)[0].split(': ')[1]) - print('Batch size = %d' % 1) - print('Latency: %.3f ms' % (latency_per_sample / 10**6)) - print('Throughput: %.3f samples/sec' % (samples_per_s)) - - print("Done!") - - - -if __name__ == "__main__": - main() diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh deleted file mode 100644 index 1044e6d4662..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh +++ /dev/null @@ -1,91 +0,0 @@ -#/bin/bash - -set -euo pipefail - -root_dir=`pwd` -work_dir=$root_dir/mlperf-rnnt-librispeech -local_data_dir=$work_dir/local_data -librispeech_download_dir=$local_data_dir/LibriSpeech -stage=3 - -mkdir -p $work_dir $local_data_dir $librispeech_download_dir - -install_dir=third_party/install -mkdir -p $install_dir -install_dir=$(readlink -f $install_dir) - -set +u -source "$($CONDA_EXE info --base)/etc/profile.d/conda.sh" -set -u - -# stage -1: install dependencies -if [[ $stage -le -1 ]]; then - conda env create --force --file environment.yml - - set +u - source "$(conda info --base)/etc/profile.d/conda.sh" - conda activate mlperf-rnnt - set -u - - # We need to convert .flac files to .wav files via sox. Not all sox installs have flac support, so we install from source. - wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz - (cd third_party; tar xf flac-1.3.2.tar.xz; cd flac-1.3.2; ./configure --prefix=$install_dir && make && make install) - - #wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz - (cd third_party; tar zxf sox-14.4.2.tar.gz; cd sox-14.4.2; LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install) - - (cd $(git rev-parse --show-toplevel)/loadgen; python setup.py install) -fi - -export PATH="$install_dir/bin/:$PATH" - -set +u -conda activate mlperf-rnnt -set -u - -# stage 0: download model. Check checksum to skip? -if [[ $stage -le 0 ]]; then - wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt -fi - -# stage 1: download data. This will hae a non-zero exit code if the -# checksum is incorrect. -if [[ $stage -le 1 ]]; then - python pytorch/utils/download_librispeech.py \ - pytorch/utils/librispeech-inference.csv \ - $librispeech_download_dir \ - -e $local_data_dir --skip_download -fi - -if [[ $stage -le 2 ]]; then - python pytorch/utils/convert_librispeech.py \ - --input_dir $librispeech_download_dir/dev-clean \ - --dest_dir $local_data_dir/dev-clean-wav \ - --output_json $local_data_dir/dev-clean-wav.json -fi - -if [[ $stage -le 3 ]]; then - for backend in pytorch; do - for accuracy in ""; do - for scenario in Offline; do - log_dir=${work_dir}/${scenario}_${backend} - if [ ! -z ${accuracy} ]; then - log_dir+=_accuracy - fi - log_dir+=rerun - - python run.py --backend pytorch \ - --dataset_dir $local_data_dir \ - --manifest $local_data_dir/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $work_dir/rnnt.pt \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir ${log_dir} \ - ${accuracy} & - - done - done - done - wait -fi diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh deleted file mode 100644 index b0c755ba11e..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash -set -x - -export TCMALLOC_DIR=$CONDA_PREFIX/lib -export KMP_BLOCKTIME=1 -# tcmalloc: -#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so - -# jemalloc -export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so -#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" -export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000" - -PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'` -SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'` -IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' ` -export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH - -sockets=`lscpu | grep Socket | awk '{print $2}'` -cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'` -export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760 - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - tuned_checkpoint=saved_results - scenario=Offline - backend=pytorch - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo ${var} |cut -f2 -d=) - ;; - --int8=*) - int8=$(echo ${var} |cut -f2 -d=) - ;; - --config=*) - tuned_checkpoint=$(echo $var |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - - -# run_benchmark -function run_benchmark { - if [[ ${mode} == "accuracy" ]]; then - mode_cmd=" --accuracy_only" - elif [[ ${mode} == "benchmark" ]]; then - mode_cmd=" --benchmark --user_conf user_benchmark.sh" - else - echo "Error: No such mode: ${mode}" - exit 1 - fi - - extra_cmd="" - if [[ ${int8} == "true" ]]; then - extra_cmd=$extra_cmd" --int8 --bf16" - fi - echo $extra_cmd - - python run.py --dataset_dir ${dataset_location} \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $input_model \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir output \ - --tuned_checkpoint $tuned_checkpoint \ - $mode_cmd \ - ${extra_cmd} -} - -main "$@" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh deleted file mode 100644 index ca8fa2469ae..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh +++ /dev/null @@ -1,69 +0,0 @@ -export TCMALLOC_DIR=$CONDA_PREFIX/lib -export KMP_BLOCKTIME=1 -# tcmalloc: -#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so - -# jemalloc -export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so -#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" -export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000" - -PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'` -SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'` -IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' ` -export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH - -sockets=`lscpu | grep Socket | awk '{print $2}'` -cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'` - -root_dir=`pwd` -work_dir=$root_dir/mlperf-rnnt-librispeech -local_data_dir=$work_dir/local_data -configure_path=calibration_result.json - -scenario=Offline -machine_conf=offline.conf -backend=pytorch -for arg in $@; do - case ${arg} in - --accuracy) accuracy="--accuracy_only";; - --debug) debug="--debug";; - --profile*) - if [[ $(echo ${arg} | cut -f2 -d=) == "--profile" ]];then - profile="--profile True" - else - profile="--profile $(echo ${arg} | cut -f2 -d=)" - fi;; - --server) - scenario=Server - machine_conf=server.conf;; - --verbose*) verbose="--verbose $(echo ${arg} | cut -f2 -d=)";; - --warmup) warmup="--warmup";; - *) echo "Error: No such parameter: ${arg}" exit 1;; - esac -done - -log_dir=${work_dir}/${scenario}_${backend} -if [ ! -z ${accuracy} ]; then - log_dir+=_accuracy -fi -log_dir+=rerun - -export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760 - -python run.py --dataset_dir $local_data_dir \ - --manifest $local_data_dir/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $work_dir/rnnt.pt \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir output \ - --configure_path $configure_path \ - --machine_conf $machine_conf \ - ${accuracy} \ - ${warmup} \ - ${debug} \ - ${profile} \ - ${verbose} \ - --bf16 \ - --int8 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh deleted file mode 100644 index 0a03c6f2e3d..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh +++ /dev/null @@ -1,81 +0,0 @@ -export TCMALLOC_DIR=$CONDA_PREFIX/lib -export KMP_BLOCKTIME=1 -# tcmalloc: -#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so - -# jemalloc -export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so -#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000" -export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000" - -PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'` -SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'` -IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' ` -export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH - -sockets=`lscpu | grep Socket | awk '{print $2}'` -cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'` - -function main { - - init_params "$@" - run_tuning - -} -# init params -function init_params { - tuned_checkpoint=saved_results - scenario=Offline - backend=pytorch - for var in "$@" - do - case $var in - --topology=*) - topology=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - tuned_checkpoint=$(echo $var |cut -f2 -d=) - ;; - --debug) debug="--debug";; - --profile*) - if [[ $(echo ${arg} | cut -f2 -d=) == "--profile" ]];then - profile="--profile True" - else - profile="--profile $(echo ${arg} | cut -f2 -d=)" - fi;; - --server) - scenario=Server - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done -} - -export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760 - -# run_tuning -function run_tuning { - - python run.py --dataset_dir $dataset_location \ - --manifest $dataset_location/dev-clean-wav.json \ - --pytorch_config_toml pytorch/configs/rnnt.toml \ - --pytorch_checkpoint $input_model \ - --scenario ${scenario} \ - --backend ${backend} \ - --log_dir output \ - --tune \ - --tuned_checkpoint $tuned_checkpoint \ - --int8 \ - --bf16 -} - -main "$@" diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf deleted file mode 100644 index b1cc9d4fb24..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf +++ /dev/null @@ -1,3 +0,0 @@ -rnnt.Server.target_qps = 95 -#rnn.Server.min_query_count = 226170 -rnnt.Offline.target_qps = 10 diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf deleted file mode 100644 index ae467b6f866..00000000000 --- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf +++ /dev/null @@ -1,5 +0,0 @@ -rnnt.Server.target_qps = 95 -#rnn.Server.min_query_count = 226170 -# rnnt.Offline.target_qps = 10 -*.Offline.performance_issue_same = 1 -*.Offline.performance_issue_unique = 1 diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md index db54f8c3978..8dc19e23fcd 100644 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md +++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md @@ -15,7 +15,10 @@ pip install neural-compressor ```shell pip install intel-tensorflow==2.4.0 ``` - +### 3. Install tensorflow_model_optimization +```shell +pip install tensorflow_model_optimization +``` ## Run Command ```shell python resnet_v2.py # to get the quantized ResNet-V2 model which will be saved into './trained_qat_model'.