diff --git a/examples/README.md b/examples/README.md
index 2cfe8d52ecc..7342bbe1e8a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -461,7 +461,7 @@ Intel® Neural Compressor validated examples with multiple compression technique
RNN-T |
Speech Recognition |
Post-Training Dynamic / Static Quantization |
- eager / ipex |
+ eager |
Wav2Vec2 |
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py
deleted file mode 100644
index 14ce2478100..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/QSL.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import sys
-import os
-sys.path.insert(0, os.path.join(os.getcwd(), "pytorch"))
-
-from parts.manifest import Manifest
-from parts.segment import AudioSegment
-
-import numpy as np
-
-import mlperf_loadgen as lg
-
-
-class AudioQSL:
- def __init__(self, dataset_dir, manifest_filepath, labels,
- sample_rate=16000, perf_count=None):
- m_paths = [manifest_filepath]
- self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels),
- normalize=True, max_duration=15.0)
- self.sample_rate = sample_rate
- self.count = len(self.manifest)
- perf_count = self.count if perf_count is None else perf_count
- self.sample_id_to_sample = {}
- self.qsl = lg.ConstructQSL(self.count, perf_count,
- self.load_query_samples,
- self.unload_query_samples)
- print(
- "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format(
- self.manifest.duration / 3600,
- self.manifest.filtered_duration / 3600,
- self.count))
-
- def load_query_samples(self, sample_list):
- for sample_id in sample_list:
- self.sample_id_to_sample[sample_id] = self._load_sample(sample_id)
-
- def unload_query_samples(self, sample_list):
- for sample_id in sample_list:
- del self.sample_id_to_sample[sample_id]
-
- def idxs(self):
- return self.sample_id_to_sample.keys()
-
- def _load_sample(self, index):
- sample = self.manifest[index]
- segment = AudioSegment.from_file(sample['audio_filepath'][0],
- target_sr=self.sample_rate)
- waveform = segment.samples
- assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32
- return waveform
-
- def __getitem__(self, index):
- return self.sample_id_to_sample[index]
-
- def __del__(self):
- lg.DestroyQSL(self.qsl)
- print("Finished destroying QSL.")
-
-# We have no problem fitting all data in memory, so we do that, in
-# order to speed up execution of the benchmark.
-class AudioQSLInMemory(AudioQSL):
- def __init__(self, dataset_dir, manifest_filepath, labels,
- sample_rate=16000, perf_count=None):
- super().__init__(dataset_dir, manifest_filepath, labels,
- sample_rate, perf_count)
- super().load_query_samples(range(self.count))
-
- def load_query_samples(self, sample_list):
- pass
-
- def unload_query_samples(self, sample_list):
- pass
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md
deleted file mode 100644
index fe734f8266d..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/README.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# RNNT MLPerf Inference v1.1
-
-> Note: not support IPEX 1.10, 1.11
-
-## SW requirements
-###
-| SW |configuration |
-|--|--|
-| GCC | GCC 9.3 |
-
-## Steps to run RNNT
-
-### 1. Install anaconda 3.0
-```
- wget https://repo.continuum.io/archive/Anaconda3-5.0.0-Linux-x86_64.sh -O anaconda3.sh
- chmod +x anaconda3.sh
- ~/anaconda3.sh -b -p ~/anaconda3
- ~/anaconda3/bin/conda create -n rnnt python=3.7
-
- export PATH=~/anaconda3/bin:$PATH
- source ~/anaconda3/bin/activate rnnt
-```
-### 2. Prepare code and environment
-```
- cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex
- bash prepare_env.sh
-```
-
-### 3. Install IPEX
-refer [intel/intel-extension-for-pytorch at mlperf/inference-1.1 (github.com)](https://github.com/intel/intel-extension-for-pytorch/tree/mlperf/inference-1.1)
-
-1. install PyTorch1.8 and TorchVision0.9
-
- refer [PyTorch install](https://pytorch.org/get-started/locally/)
- ```shell position-relative
- pip3 install torch==1.8.0+cpu torchvision==0.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
- ```
-2. Get Intel PyTorch Extension source and install
- > **Note**
- >
- > GCC9 compiler is recommended
- >
-
- ```shell position-relative
- git clone https://github.com/intel/intel-extension-for-pytorch
- cd intel-extension-for-pytorch
- git checkout mlperf/inference-1.1
- git submodule sync
- git submodule update --init --recursive
- pip install lark-parser hypothesis
-
- python setup.py install
- ```
-
-### 4. Prepare model and dataset
-```
- work_dir=mlperf-rnnt-librispeech
- local_data_dir=$work_dir/local_data
- mkdir -p $local_data_dir
- librispeech_download_dir=.
- # prepare model
- wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt
-
- # prepare inference dataset
- wget https://www.openslr.org/resources/12/dev-clean.tar.gz
- # suggest you check run.sh to locate the dataset
- python pytorch/utils/download_librispeech.py \
- pytorch/utils/librispeech-inference.csv \
- $librispeech_download_dir \
- -e $local_data_dir --skip_download
- python pytorch/utils/convert_librispeech.py \
- --input_dir $local_data_dir/LibriSpeech/dev-clean \
- --dest_dir $local_data_dir/dev-clean-wav \
- --output_json $local_data_dir/dev-clean-wav.json
-```
-
-### 5. tune RNN-T with Neural Compressor
- Please update the setup_env_offline.sh or setup_env_server.sh and user.conf according to your platform resource.
-```
- # offline
- ./run_tuning.sh --dataset_location=$local_data_dir --input_model=$work_dir/rnnt.pt
- # server scenario
- ./run_tuning.sh --dataset_location=$local_data_dir --input_model=$work_dir/rnnt.pt --server
-```
-
-### 6. benchmark
-```
-# fp32 benchmark
-bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=benchmark
-# int8+bf16 benchmark
-bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=benchmark --int8=true
-# fp32 accuracy
-bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=accuracy
-# int8+bf16 benchmark
-bash ./run_benchmark.sh --dataset_location=/path/to/RNN-T/dataset/LibriSpeech --input_model=rnnt.pt --mode=accuracy --int8=true
-
-```
-
-### Note on Server scenario
-
-* Only quantized encoder and decoder is bf16 ops.
-* For server scenario, we exploit the fact that incoming data have different sequence lengths (and inference times) by bucketing according to sequence length
-and specifying batch size for each bucket such that latency can be satisfied. The settings are specified in machine.conf file and required fields
-are cores_per_instance, num_instances, waveform_len_cutoff, batch_size.
-
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py
deleted file mode 100644
index ea81792855b..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/accuracy_eval.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import array
-import json
-import sys
-import os
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch"))
-
-from QSL import AudioQSL
-from helpers import process_evaluation_epoch, __gather_predictions
-from parts.manifest import Manifest
-
-dtype_map = {
- "int8": 'b',
- "int16": 'h',
- "int32": 'l',
- "int64": 'q',
-}
-
-def get_args():
- parser = argparse.ArgumentParser()
- parser.add_argument("--log_dir", required=True)
- parser.add_argument("--dataset_dir", required=True)
- parser.add_argument("--manifest", required=True)
- parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type")
- args = parser.parse_args()
- return args
-
-def main():
- args = get_args()
- labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
- qsl = AudioQSL(args.dataset_dir, args.manifest, labels)
- manifest = qsl.manifest
- with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh:
- results = json.load(fh)
- hypotheses = []
- references = []
- for result in results:
- hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist())
- references.append(manifest[result["qsl_idx"]]["transcript"])
-
- references = __gather_predictions([references], labels=labels)
- hypotheses = __gather_predictions([hypotheses], labels=labels)
-
- d = dict(predictions=hypotheses,
- transcripts=references)
- wer = process_evaluation_epoch(d)
- print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100))
-
-if __name__ == '__main__':
- main()
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml
deleted file mode 100644
index 56c9f2e1245..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/conf.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-version: 1.0
-
-model: # mandatory. used to specify model specific information.
- name: rnnt
- framework: pytorch_ipex # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.
-
-tuning:
- accuracy_criterion:
- relative: 0.01 # optional. default value is relative, other value is absolute. this example allows relative accuracy loss: 1%.
- exit_policy:
- timeout: 0 # optional. tuning timeout (seconds). default value is 0 which means early stop. combine with max_trials field to decide when to exit.
- random_seed: 9527 # optional. random seed for deterministic tuning.
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json
deleted file mode 100644
index 1dc6dae0a85..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/configure_lstm_only_encoder.json
+++ /dev/null
@@ -1,52 +0,0 @@
-[
- {
- "id": 0,
- "name": "lstm",
- "algorithm": "min_max",
- "weight_granularity": "per_tensor",
- "inputs_scale": [
- 16.95465660095215
- ],
- "outputs_scale": [
- 16.95465660095215
- ],
- "inputs_zero_point": [
- 80
- ],
- "outputs_zero_point": [
- 80
- ],
- "inputs_uint8_used": [
- true
- ],
- "outputs_uint8_used": [
- true
- ],
- "quantized": true
- },
- {
- "id": 1,
- "name": "lstm",
- "algorithm": "min_max",
- "weight_granularity": "per_tensor",
- "inputs_scale": [
- 128.29344177246094
- ],
- "outputs_scale": [
- 128.29344177246094
- ],
- "inputs_zero_point": [
- 126
- ],
- "outputs_zero_point": [
- 126
- ],
- "inputs_uint8_used": [
- true
- ],
- "outputs_uint8_used": [
- true
- ],
- "quantized": true
- }
-]
\ No newline at end of file
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml
deleted file mode 100644
index dfadf861c39..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/environment.yml
+++ /dev/null
@@ -1,128 +0,0 @@
-name: mlperf-rnnt
-channels:
- - pytorch
- - conda-forge
- - defaults
-dependencies:
- - _libgcc_mutex=0.1=main
- - absl-py=0.9.0=py36_0
- - blas=1.0=mkl
- - bzip2=1.0.8=h7b6447c_0
- - ca-certificates=2020.4.5.1=hecc5488_0
- - certifi=2020.4.5.1=py36h9f0ad1d_0
- - cffi=1.14.0=py36h2e261b9_0
- - cmake=3.14.0=h52cb24c_0
- - cudatoolkit=10.1.243=h6bb024c_0
- - cudatoolkit-dev=10.1.243=h516909a_3
- - expat=2.2.6=he6710b0_0
- - freetype=2.9.1=h8a8886c_1
- - gdb=8.3.1=py36h497da48_1
- - intel-openmp=2020.0=166
- - jpeg=9b=h024ee3a_2
- - krb5=1.17.1=h173b8e3_0
- - lame=3.100=h14c3975_1001
- - ld_impl_linux-64=2.33.1=h53a641e_7
- - libcurl=7.69.1=h20c2e04_0
- - libedit=3.1.20181209=hc058e9b_0
- - libffi=3.2.1=hd88cf55_4
- - libgcc-ng=9.1.0=hdf63c60_0
- - libgfortran-ng=7.3.0=hdf63c60_0
- - libpng=1.6.37=hbc83047_0
- - libssh2=1.9.0=h1ba5d50_1
- - libstdcxx-ng=9.1.0=hdf63c60_0
- - libtiff=4.1.0=h2733197_0
- - mad=0.15.1b=he1b5a44_0
- - mkl=2020.0=166
- - mkl-include=2020.0=166
- - mkl-service=2.3.0=py36he904b0f_0
- - mkl_fft=1.0.15=py36ha843d7b_0
- - mkl_random=1.1.0=py36hd6b4f25_0
- - ncurses=6.1=hf484d3e_1002
- - ninja=1.9.0=py36hfd86e86_0
- - numpy=1.18.1=py36h4f9e942_0
- - numpy-base=1.18.1=py36hde5b4d6_1
- - olefile=0.46=py_0
- - openssl=1.1.1g=h516909a_0
- - pillow=7.0.0=py36hb39fc2d_0
- - pip=20.0.2=py36_1
- - pycparser=2.20=py_0
- - python=3.6.10=h0371630_0
- - python_abi=3.6=1_cp36m
- - pytorch=1.5.0=py3.6_cuda10.1.243_cudnn7.6.3_0
- - pyyaml=5.3.1=py36h7b6447c_0
- - readline=7.0=hf8c457e_1001
- - rhash=1.3.8=h1ba5d50_0
- - setuptools=46.1.3=py36_0
- - six=1.14.0=py36_0
- - sqlite=3.31.1=h7b6447c_0
- - tk=8.6.8=hbc83047_0
- - torchvision=0.6.0=py36_cu101
- - wheel=0.34.2=py36_0
- - xz=5.2.4=h14c3975_4
- - yaml=0.1.7=had09818_2
- - zlib=1.2.11=h7b6447c_3
- - zstd=1.3.7=h0b5b093_0
- - pip:
- - ascii-graph==1.5.1
- - attrs==19.3.0
- - audioread==2.1.8
- - autopep8==1.5.1
- - backcall==0.1.0
- - chardet==3.0.4
- - coverage==5.0.4
- - decorator==4.4.2
- - entrypoints==0.3
- - flake8==3.7.9
- - grpcio==1.28.1
- - idna==2.9
- - importlib-metadata==1.6.0
- - inflect==4.1.0
- - ipdb==0.13.2
- - ipython==7.13.0
- - ipython-genutils==0.2.0
- - jedi==0.16.0
- - joblib==0.14.1
- - librosa==0.7.2
- - llvmlite==0.31.0
- - markdown==3.2.1
- - mccabe==0.6.1
- - more-itertools==8.2.0
- - numba==0.48.0
- - onnx==1.6.0
- - onnxruntime==1.2.0
- - packaging==20.3
- - pandas==0.24.2
- - parso==0.6.2
- - pexpect==4.8.0
- - pickleshare==0.7.5
- - pluggy==0.13.1
- - prompt-toolkit==3.0.5
- - protobuf==3.11.3
- - ptyprocess==0.6.0
- - py==1.8.1
- - pycodestyle==2.5.0
- - pyflakes==2.1.1
- - pygments==2.6.1
- - pyparsing==2.4.7
- - pytest==5.4.2
- - python-dateutil==2.8.1
- - pytz==2019.3
- - requests==2.23.0
- - resampy==0.2.2
- - scikit-learn==0.22.2.post1
- - scipy==1.4.1
- - soundfile==0.10.3.post1
- - sox==1.3.7
- - tensorboard==2.0.0
- - toml==0.10.0
- - tqdm==4.31.1
- - traitlets==4.3.3
- - typing-extensions==3.7.4.2
- - text-unidecode==1.3
- - urllib3==1.25.8
- - wcwidth==0.1.9
- - werkzeug==1.0.1
- - wrapt==1.10.11
- - zipp==3.1.0
-prefix: /cb/home/daniel/ws/miniconda3/envs/mlperf-rnnt
-
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old
deleted file mode 100644
index 9546d41cf18..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/mlperf.conf-old
+++ /dev/null
@@ -1,68 +0,0 @@
-# The format of this config file is 'key = value'.
-# The key has the format 'model.scenario.key'. Value is mostly int64_t.
-# Model maybe '*' as wildcard. In that case the value applies to all models.
-# All times are in milli seconds
-
-# Set performance_sample_count for each model.
-# User can optionally set this to higher values in user.conf.
-mobilenet.*.performance_sample_count_override = 1024
-gnmt.*.performance_sample_count_override = 3903900
-resnet50.*.performance_sample_count_override = 1024
-ssd-mobilenet.*.performance_sample_count_override = 256
-ssd-resnet34.*.performance_sample_count_override = 64
-bert.*.performance_sample_count_override = 10833
-dlrm.*.performance_sample_count_override = 204800
-rnnt.*.performance_sample_count_override = 2513
-3d-unet.*.performance_sample_count_override = 16
-
-# Set seeds. The seeds will be distributed two weeks before the submission.
-# 0x168ad48ada698a73
-*.*.qsl_rng_seed = 1624344308455410291
-# 0x07303fed113b8976
-*.*.sample_index_rng_seed = 517984244576520566
-# 0x8b7e1740dacb67f9
-*.*.schedule_rng_seed = 10051496985653635065
-
-*.SingleStream.target_latency_percentile = 90
-*.SingleStream.min_duration = 600000
-*.SingleStream.min_query_count = 1024
-
-*.MultiStream.target_qps = 20
-*.MultiStream.target_latency_percentile = 99
-*.MultiStream.max_async_queries = 1
-*.MultiStream.target_latency = 50
-*.MultiStream.min_duration = 600000
-*.MultiStream.min_query_count = 270336
-ssd-resnet34.MultiStream.target_qps = 15
-ssd-resnet34.MultiStream.target_latency = 66
-gnmt.MultiStream.min_query_count = 90112
-gnmt.MultiStream.target_latency = 100
-gnmt.MultiStream.target_qps = 10
-gnmt.MultiStream.target_latency_percentile = 97
-
-*.Server.target_latency = 10
-*.Server.target_latency_percentile = 99
-*.Server.target_duration = 0
-*.Server.min_duration = 600000
-*.Server.min_query_count = 270336
-resnet50.Server.target_latency = 15
-ssd-resnet34.Server.target_latency = 100
-gnmt.Server.min_query_count = 90112
-gnmt.Server.target_latency = 250
-gnmt.Server.target_latency_percentile = 97
-bert.Server.target_latency = 130
-dlrm.Server.target_latency = 30
-rnnt.Server.target_latency = 1000
-
-*.Offline.target_latency_percentile = 90
-*.Offline.min_duration = 600000
-# In Offline scenario, we always have one query. But LoadGen maps this to
-# min_sample_count internally in Offline scenario, so set this to 24576 since
-# the rule requires that Offline scenario run for at least 24576 samples.
-*.Offline.min_query_count = 24576
-
-# These fields should be defined and overridden by user.conf.
-*.SingleStream.target_latency = 10
-*.Server.target_qps = 1.0
-*.Offline.target_qps = 1.0
-*.MultiStream.samples_per_query = 4
\ No newline at end of file
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md
deleted file mode 100644
index 896cdf71635..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/optional_harness_ck/README.md
+++ /dev/null
@@ -1,303 +0,0 @@
-# MLPerf Inference - Speech Recognition - RNN-T
-
-We describe an automated and reproducible workflow for the [RNN-T
-workload](https://github.com/mlperf/inference/tree/master/v0.7/speech_recognition/rnnt)
-implemented using the [Collective Knowledge](http://cknowledge.org) technology. It automatically
-downloads the model and the dataset, preprocesses the dataset, builds the LoadGen API, etc.
-For any questions or questions, please email info@dividiti.com or simply [open an issue](https://github.com/mlperf/inference/issues) on GitHub.
-
-**NB:** Below we give an _essential_ sequence of steps that should result in a successful setup
-of the RNN-T workflow on a minimally configured Linux system.
-
-The steps are extracted from a [minimalistic Amazon Linux
-2](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/Dockerfile.amazonlinux.min)
-Docker image, which is derived from a more verbose [Amazon Linux
-2](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/Dockerfile.amazonlinux)
-Docker image by omitting steps that the [Collective Knowledge
-framework](https://github.com/ctuning/ck) performs automatically.
-
-For example, installing the preprocessed dataset is explicit in the verbose image:
-```
-#-----------------------------------------------------------------------------#
-# Step 3. Download the official MLPerf Inference RNNT dataset (LibriSpeech
-# dev-clean) and preprocess it to wav.
-#-----------------------------------------------------------------------------#
-RUN ck install package --tags=dataset,speech-recognition,dev-clean,original
-# NB: Can ignore the lzma related warning.
-RUN ck install package --tags=dataset,speech-recognition,dev-clean,preprocessed
-#-----------------------------------------------------------------------------#
-```
-but is implicit in the minimalistic image:
-```
-#- #-----------------------------------------------------------------------------#
-#- # Step 3. Download the official MLPerf Inference RNNT dataset (LibriSpeech
-#- # dev-clean) and preprocess it to wav.
-#- #-----------------------------------------------------------------------------#
-#- RUN ck install package --tags=dataset,speech-recognition,dev-clean,original
-#- # NB: Can ignore the lzma related warning.
-#- RUN ck install package --tags=dataset,speech-recognition,dev-clean,preprocessed
-#- #-----------------------------------------------------------------------------#
-```
-because it's going to be triggered by a test performance run:
-```
-#+ #-----------------------------------------------------------------------------#
-#+ # Step 6. Pull all the implicit dependencies commented out in Steps 1-5.
-#+ #-----------------------------------------------------------------------------#
-RUN ck run program:speech-recognition-pytorch-loadgen --cmd_key=performance --skip_print_timers
-#+ #-----------------------------------------------------------------------------#
-```
-(Omitted steps are commented out with `#- `. Added steps are commented with `#+ `.)
-
-For other possible variations and workarounds see the [complete
-collection](https://github.com/ctuning/ck-mlperf/blob/master/docker/speech-recognition.rnnt/README.md)
-of Docker images for this workflow including Ubuntu, Debian and CentOS.
-
-# Table of Contents
-
-1. [Installation](#install)
- 1. Install [system-wide prerequisites](#install_system)
- 1. [Ubuntu 20.04 or similar](#install_system_ubuntu)
- 1. [CentOS 7 or similar](#install_system_centos_7)
- 1. [CentOS 8 or similar](#install_system_centos_8)
- 1. Install [Collective Knowledge](#install_ck) (CK) and its repositories
- 1. Detect [GCC](#detect_gcc)
- 1. Detect [Python](#detect_python)
- 1. Install [Python dependencies](#install_python_deps)
- 1. Install a branch of the [MLPerf Inference](#install_inference_repo) repo
-1. [Usage](#usage)
- 1. [Performance](#usage_performance)
- 1. [Accuracy](#usage_performance)
-
-
-## Installation
-
-
-### Install system-wide prerequisites
-
-**NB:** Run the below commands for your Linux system with `sudo` or as superuser.
-
-
-#### Ubuntu 20.04 or similar
-```bash
-$ sudo apt update -y
-$ sudo apt install -y apt-utils
-$ sudo apt upgrade -y
-$ sudo apt install -y\
- python3 python3-pip\
- gcc g++\
- make patch vim\
- git wget zip libz-dev\
- libsndfile1-dev
-$ sudo apt clean
-```
-
-
-#### CentOS 7 or similar
-```bash
-$ sudo yum upgrade -y
-$ sudo yum install -y\
- python3 python3-pip python3-devel\
- gcc gcc-c++\
- make which patch vim\
- git wget zip unzip\
- tar xz\
- libsndfile-devel
-$ sudo yum clean all
-```
-
-
-#### CentOS 8 or similar
-```bash
-$ sudo yum upgrade -y
-$ sudo yum install -y\
- gcc gcc-c++\
- make which patch vim\
- git wget zip unzip\
- openssl-devel bzip2-devel libffi-devel\
-$ sudo yum clean all
-$ sudo dnf install -y python3 python3-pip python3-devel
-$ sudo dnf --enablerepo=PowerTools install -y libsndfile-devel
-```
-
-
-
-### Install [Collective Knowledge](http://cknowledge.org/) (CK) and its repositories
-
-```bash
-$ export CK_PYTHON=/usr/bin/python3
-$ $CK_PYTHON -m pip install --ignore-installed pip setuptools --user
-$ $CK_PYTHON -m pip install ck
-$ ck version
-V1.15.0
-$ ck pull repo:ck-mlperf
-$ ck pull repo:ck-pytorch
-```
-
-
-### Detect (system) GCC
-```
-$ export CK_CC=/usr/bin/gcc
-$ ck detect soft:compiler.gcc --full_path=$CK_CC
-$ ck show env --tags=compiler,gcc
-Env UID: Target OS: Bits: Name: Version: Tags:
-
-b8bd7b49f72f9794 linux-64 64 GNU C compiler 7.3.1 64bits,compiler,gcc,host-os-linux-64,lang-c,lang-cpp,target-os-linux-64,v7,v7.3,v7.3.1
-```
-**NB:** Required to build the FLAC and SoX dependencies of preprocessing. CK can normally detect compilers automatically, but we are playing safe here.
-
-
-### Detect (system) Python
-```
-$ export CK_PYTHON=/usr/bin/python3
-$ ck detect soft:compiler.python --full_path=$CK_PYTHON
-$ ck show env --tags=compiler,python
-Env UID: Target OS: Bits: Name: Version: Tags:
-
-633a6b22205eb07f linux-64 64 python 3.7.6 64bits,compiler,host-os-linux-64,lang-python,python,target-os-linux-64,v3,v3.7,v3.7.6
-```
-**NB:** CK can normally detect available Python interpreters automatically, but we are playing safe here.
-
-
-### Install Python dependencies (in userspace)
-
-#### Install implicit dependencies via pip
-```bash
-$ export CK_PYTHON=/usr/bin/python3
-$ $CK_PYTHON -m pip install --user --upgrade \
- tqdm wheel toml unidecode inflect sndfile librosa numba==0.48
-...
-Successfully installed inflect-4.1.0 librosa-0.7.2 llvmlite-0.31.0 numba-0.48.0 sndfile-0.2.0 unidecode-1.1.1 wheel-0.34.2
-```
-**NB:** These dependencies are _implicit_, i.e. CK will not try to satisfy them. If they are not installed, however, the workflow will fail.
-
-
-#### Install explicit dependencies via CK (also via `pip`, but register with CK at the same time)
-```bash
-$ ck install package --tags=python-package,torch
-$ ck install package --tags=python-package,pandas
-$ ck install package --tags=python-package,sox
-$ ck install package --tags=python-package,absl
-```
-**NB:** These dependencies are _explicit_, i.e. CK will try to satisfy them automatically. On a machine with multiple versions of Python, things can get messy, so we are playing safe here.
-
-
-### Install an MLPerf Inference [branch](https://github.com/dividiti/inference/tree/dvdt-rnnt) with [dividiti](http://dividiti.com)'s tweaks for RNN-T
-```bash
-$ ck install package --tags=mlperf,inference,source,dividiti.rnnt
-```
-**NB:** This source will be used for building LoadGen as well.
-
-
-
-## Usage
-
-
-### Running a performance test
-
-The first run will end up resolving all the remaining explicit dependencies:
-- preprocessing the LibriSpeech Dev-Clean dataset to wav;
-- building the LoadGen API;
-- downloading the PyTorch model.
-
-It's a performance run which should print something like:
-```
-$ ck run program:speech-recognition-pytorch-loadgen --cmd_key=performance --skip_print_timers
-...
-Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513
-Running Loadgen test...
-Average latency (ms) per query:
-7335.167247106061
-Median latency (ms):
-7391.662108
-90 percentile latency (ms):
-13347.925176
-================================================
-MLPerf Results Summary
-================================================
-SUT name : PySUT
-Scenario : Offline
-Mode : Performance
-Samples per second: 4.63626
-Result is : INVALID
- Min duration satisfied : NO
- Min queries satisfied : Yes
-Recommendations:
- * Increase expected QPS so the loadgen pre-generates a larger (coalesced) query.
-
-================================================
-Additional Stats
-================================================
-Min latency (ns) : 278432559
-Max latency (ns) : 14235613054
-Mean latency (ns) : 7335167247
-50.00 percentile latency (ns) : 7521181269
-90.00 percentile latency (ns) : 13402430910
-95.00 percentile latency (ns) : 13723706550
-97.00 percentile latency (ns) : 14054764438
-99.00 percentile latency (ns) : 14235613054
-99.90 percentile latency (ns) : 14235613054
-
-================================================
-Test Parameters Used
-================================================
-samples_per_query : 66
-target_qps : 1
-target_latency (ns): 0
-max_async_queries : 1
-min_duration (ms): 60000
-max_duration (ms): 0
-min_query_count : 1
-max_query_count : 0
-qsl_rng_seed : 3133965575612453542
-sample_index_rng_seed : 665484352860916858
-schedule_rng_seed : 3622009729038561421
-accuracy_log_rng_seed : 0
-accuracy_log_probability : 0
-print_timestamps : false
-performance_issue_unique : false
-performance_issue_same : false
-performance_issue_same_index : 0
-performance_sample_count : 2513
-
-No warnings encountered during test.
-
-No errors encountered during test.
-Done!
-
-Execution time: 38.735 sec.
-```
-
-The above output is the contents of `mlperf_log_summary.txt`, one of the log files generated by LoadGen. All LoadGen log files can be located in the program's temporary directory:
-```bash
-$ cd `ck find program:speech-recognition-pytorch-loadgen`/tmp && ls -la mlperf_log_*
--rw-r--r-- 1 anton eng 4 Jul 3 18:06 mlperf_log_accuracy.json
--rw-r--r-- 1 anton eng 20289 Jul 3 18:06 mlperf_log_detail.txt
--rw-r--r-- 1 anton eng 1603 Jul 3 18:06 mlperf_log_summary.txt
--rw-r--r-- 1 anton eng 860442 Jul 3 18:06 mlperf_log_trace.json
-```
-
-
-### Running an accuracy test
-
-```
-$ ck run program:speech-recognition-pytorch-loadgen --cmd_key=accuracy --skip_print_timers
-...
-Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513
-Running Loadgen test...
-
-No warnings encountered during test.
-
-No errors encountered during test.
-Running accuracy script: /usr/bin/python3 /disk1/homes/anton/CK-TOOLS/mlperf-inference-dividiti.rnnt/inference/v0.7/speech_recognition/rnnt/accuracy_eval.py --log_dir /disk1/homes/anton/CK/ck-mlperf/program/speech-recognition-pytorch-loadgen/tmp --dataset_dir /homes/anton/CK-TOOLS/dataset-librispeech-preprocessed-to-wav-dev-clean/../ --manifest /homes/anton/CK-TOOLS/dataset-librispeech-preprocessed-to-wav-dev-clean/wav-list.json
-Dataset loaded with 4.36 hours. Filtered 1.02 hours. Number of samples: 2513
-Word Error Rate: 0.07452253714852645
-Done!
-
-Execution time: 502.197 sec.
-
-$ cd `ck find program:speech-recognition-pytorch-loadgen`/tmp && ls -la mlperf_log_*
--rw-r--r-- 1 anton eng 3862427 Jul 3 18:00 mlperf_log_accuracy.json
--rw-r--r-- 1 anton eng 20126 Jul 3 18:00 mlperf_log_detail.txt
--rw-r--r-- 1 anton eng 74 Jul 3 18:00 mlperf_log_summary.txt
--rw-r--r-- 1 anton eng 29738248 Jul 3 18:00 mlperf_log_trace.json
-```
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh
deleted file mode 100644
index 4929243a6fe..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_env.sh
+++ /dev/null
@@ -1,67 +0,0 @@
- #set -eo pipefail
- set -x
-
- WORKDIR=`pwd`
-
- PATTERN='[-a-zA-Z0-9_]*='
- if [ $# -lt "0" ] ; then
- echo 'ERROR:'
- printf 'Please use following parameters:
- --code=
- '
- exit 1
- fi
-
- for i in "$@"
- do
- case $i in
- --code=*)
- code=`echo $i | sed "s/${PATTERN}//"`;;
- *)
- echo "Parameter $i not recognized."; exit 1;;
- esac
- done
-
- if [ -d $code ];then
- REPODIR=$code
- fi
-
- echo "Install dependencies"
- pip install sklearn onnx tqdm lark-parser
- pip install -e git+https://github.com/mlperf/logging@0.7.0-rc2#egg=mlperf-logging
- conda install ninja pyyaml setuptools cmake cffi typing --yes
- conda install numpy=1.21.5 --yes
- conda install intel-openmp mkl mkl-include --no-update-deps --yes
- conda install -c conda-forge gperftools --yes
- conda install jemalloc=5.0.1 --yes
- pip install opencv-python absl-py opencv-python-headless intel-openmp
-
- echo "Install libraries"
- mkdir $WORKDIR/local
- export install_dir=$WORKDIR/local
- cd $WORKDIR && mkdir third_party
- wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz
- cd third_party && tar xf flac-1.3.2.tar.xz && cd flac-1.3.2
- ./configure --prefix=$install_dir && make && make install
-
- cd $WORKDIR
- wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz
- cd third_party && tar zxf sox-14.4.2.tar.gz && cd sox-14.4.2
- LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install
-
- cd $WORKDIR
- wget http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz -O third_party/libsndfile-1.0.28.tar.gz
- cd third_party && tar zxf libsndfile-1.0.28.tar.gz && cd libsndfile-1.0.28
- ./configure --prefix=$install_dir && make && make install
-
- echo "Install pytorch/ipex"
- export LD_LIBRARY_PATH=$WORKDIR/local/lib:$LD_LIBRARY_PATH
-
- cd $WORKDIR
- bash prepare_loadgen.sh ${WORKDIR}
-
- echo "Install dependencies for pytorch_SUT.py"
- pip install toml text-unidecode inflect
- pip install librosa==0.8.1
-
- set +x
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh
deleted file mode 100644
index 5ca666b39c6..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/prepare_loadgen.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-WORKDIR=$1
-pushd .
-cd $WORKDIR
-echo Current directory is $PWD
-echo Using gcc=`which gcc`
-echo GCC version should >= 9
-gcc --version
-CC=`which gcc`
-
-# install pytorch
-echo "Install pytorch/ipex"
-export LD_LIBRARY_PATH=$WORKDIR/local/lib:$LD_LIBRARY_PATH
-CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-
-cd $WORKDIR
-echo "Install loadgen"
-git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference
-cd mlperf_inference
-git checkout r1.1
-git log -1
-git submodule update --init --recursive
-cd loadgen
-CFLAGS="-std=c++14" python setup.py install
-
-popd
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile
deleted file mode 100644
index 1cb52bf6261..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/Dockerfile
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3
-FROM ${FROM_IMAGE_NAME}
-
-
-RUN apt-get update && apt-get install -y libsndfile1 && apt-get install -y sox && rm -rf /var/lib/apt/lists/*
-
-RUN COMMIT_SHA=c6d12f9e1562833c2b4e7ad84cb22aa4ba31d18c && \
- git clone https://github.com/HawkAaron/warp-transducer deps/warp-transducer && \
- cd deps/warp-transducer && \
- git checkout $COMMIT_SHA && \
- mkdir build && \
- cd build && \
- cmake .. && \
- make VERBOSE=1 && \
- export CUDA_HOME="/usr/local/cuda" && \
- export WARP_RNNT_PATH=`pwd` && \
- export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME && \
- export LD_LIBRARY_PATH="$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH" && \
- export LIBRARY_PATH=$CUDA_HOME/lib64:$LIBRARY_PATH && \
- export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH && \
- export CFLAGS="-I$CUDA_HOME/include $CFLAGS" && \
- cd ../pytorch_binding && \
- python3 setup.py install --user && \
- rm -rf ../tests test ../tensorflow_binding && \
- cd ../../..
-
-WORKDIR /workspace/jasper
-
-COPY requirements.txt .
-RUN pip install --disable-pip-version-check -U -r requirements.txt
-
-COPY . .
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE
deleted file mode 100644
index 75ee157cd96..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/LICENSE
+++ /dev/null
@@ -1,204 +0,0 @@
- Except where otherwise noted, the following license applies to all files in this repo.
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright 2019 NVIDIA Corporation
- Copyright 2019 Myrtle Software Limited, www.myrtle.ai
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE
deleted file mode 100644
index 7916839bcc4..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/NOTICE
+++ /dev/null
@@ -1,5 +0,0 @@
-Jasper in PyTorch
-
-This repository includes source code (in "parts/") from:
-* https://github.com/keithito/tacotron and https://github.com/ryanleary/patter licensed under MIT license.
-
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml
deleted file mode 100644
index a4cd1dfb470..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/configs/rnnt.toml
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-model = "RNNT"
-
-[input]
-normalize = "per_feature"
-sample_rate = 16000
-window_size = 0.02
-window_stride = 0.01
-window = "hann"
-features = 80
-n_fft = 512
-frame_splicing = 3
-dither = 0.00001
-feat_type = "logfbank"
-normalize_transcripts = true
-trim_silence = true
-pad_to = 0 # TODO
-max_duration = 16.7
-speed_perturbation = true
-
-
-cutout_rect_regions = 0
-cutout_rect_time = 60
-cutout_rect_freq = 25
-
-
-cutout_x_regions = 2
-cutout_y_regions = 2
-cutout_x_width = 6
-cutout_y_width = 6
-
-
-[input_eval]
-normalize = "per_feature"
-sample_rate = 16000
-window_size = 0.02
-window_stride = 0.01
-window = "hann"
-features = 80
-n_fft = 512
-frame_splicing = 3
-dither = 0.00001
-feat_type = "logfbank"
-normalize_transcripts = true
-trim_silence = true
-pad_to = 0
-
-
-[rnnt]
-rnn_type = "lstm"
-encoder_n_hidden = 1024
-encoder_pre_rnn_layers = 2
-encoder_stack_time_factor = 2
-encoder_post_rnn_layers = 3
-pred_n_hidden = 320
-pred_rnn_layers = 2
-forget_gate_bias = 1.0
-joint_n_hidden = 512
-dropout=0.32
-
-
-[labels]
-labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py
deleted file mode 100644
index 7b9036f1c55..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/dataset.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-This file contains classes and functions related to data loading
-"""
-from collections import namedtuple
-import torch
-import numpy as np
-from torch.utils.data import Dataset
-from parts.manifest import Manifest
-from parts.features import WaveformFeaturizer
-
-
-def seq_collate_fn(batch):
- """batches samples and returns as tensors
- Args:
- batch : list of samples
- Returns
- batches of tensors
- """
- audio_lengths = torch.LongTensor([sample.waveform.size(0)
- for sample in batch])
- transcript_lengths = torch.LongTensor([sample.transcript.size(0)
- for sample in batch])
- permute_indices = torch.argsort(audio_lengths, descending=True)
-
- audio_lengths = audio_lengths[permute_indices]
- transcript_lengths = transcript_lengths[permute_indices]
- padded_audio_signals = torch.nn.utils.rnn.pad_sequence(
- [batch[i].waveform for i in permute_indices],
- batch_first=True
- )
- transcript_list = [batch[i].transcript
- for i in permute_indices]
- packed_transcripts = torch.nn.utils.rnn.pack_sequence(transcript_list,
- enforce_sorted=False)
-
- # TODO: Don't I need to stop grad at some point now?
- return (padded_audio_signals, audio_lengths, transcript_list,
- packed_transcripts, transcript_lengths)
-
-
-class AudioToTextDataLayer:
- """Data layer with data loader
- """
-
- def __init__(self, **kwargs):
- featurizer_config = kwargs['featurizer_config']
- pad_to_max = kwargs.get('pad_to_max', False)
- perturb_config = kwargs.get('perturb_config', None)
- manifest_filepath = kwargs['manifest_filepath']
- dataset_dir = kwargs['dataset_dir']
- labels = kwargs['labels']
- batch_size = kwargs['batch_size']
- drop_last = kwargs.get('drop_last', False)
- shuffle = kwargs.get('shuffle', True)
- min_duration = featurizer_config.get('min_duration', 0.1)
- max_duration = featurizer_config.get('max_duration', None)
- normalize_transcripts = kwargs.get('normalize_transcripts', True)
- trim_silence = kwargs.get('trim_silence', False)
- sampler_type = kwargs.get('sampler', 'default')
- speed_perturbation = featurizer_config.get('speed_perturbation', False)
- sort_by_duration = sampler_type == 'bucket'
- self._featurizer = WaveformFeaturizer.from_config(
- featurizer_config, perturbation_configs=perturb_config)
- self._dataset = AudioDataset(
- dataset_dir=dataset_dir,
- manifest_filepath=manifest_filepath,
- labels=labels, blank_index=len(labels),
- sort_by_duration=sort_by_duration,
- pad_to_max=pad_to_max,
- featurizer=self._featurizer, max_duration=max_duration,
- min_duration=min_duration, normalize=normalize_transcripts,
- trim=trim_silence, speed_perturbation=speed_perturbation)
-
- print('sort_by_duration', sort_by_duration)
-
- self._dataloader = torch.utils.data.DataLoader(
- dataset=self._dataset,
- batch_size=batch_size,
- collate_fn=lambda b: seq_collate_fn(b),
- drop_last=drop_last,
- shuffle=shuffle,
- num_workers=0,
- pin_memory=True,
- sampler=None
- )
-
- def __len__(self):
- return len(self._dataset)
-
- @property
- def data_iterator(self):
- return self._dataloader
-
-
-class AudioDataset(Dataset):
- def __init__(self, dataset_dir, manifest_filepath, labels, featurizer, max_duration=None, pad_to_max=False,
- min_duration=None, blank_index=0, max_utts=0, normalize=True, sort_by_duration=False,
- trim=False, speed_perturbation=False):
- """Dataset that loads tensors via a json file containing paths to audio files, transcripts, and durations
- (in seconds). Each entry is a different audio sample.
- Args:
- dataset_dir: absolute path to dataset folder
- manifest_filepath: relative path from dataset folder to manifest json as described above.
- labels: String containing all the possible characters to map to
- featurizer: Initialized featurizer class that converts paths of audio to feature tensors
- max_duration: If audio exceeds this length, do not include in dataset
- min_duration: If audio is less than this length, do not include in dataset
- pad_to_max: if specified input sequences into dnn model will be padded to max_duration
- blank_index: blank index for ctc loss / decoder
- max_utts: Limit number of utterances
- normalize: whether to normalize transcript text
- sort_by_duration: whether or not to sort sequences by increasing duration
- trim: if specified trims leading and trailing silence from an audio signal.
- speed_perturbation: specify if using data contains speed perburbation
- """
- m_paths = [manifest_filepath]
- self.manifest = Manifest(dataset_dir, m_paths, labels, blank_index, pad_to_max=pad_to_max,
- max_duration=max_duration,
- sort_by_duration=sort_by_duration,
- min_duration=min_duration, max_utts=max_utts,
- normalize=normalize, speed_perturbation=speed_perturbation)
- self.featurizer = featurizer
- self.blank_index = blank_index
- self.trim = trim
- print(
- "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours.".format(
- self.manifest.duration / 3600,
- self.manifest.filtered_duration / 3600))
-
- def __getitem__(self, index):
- sample = self.manifest[index]
- rn_indx = np.random.randint(len(sample['audio_filepath']))
- duration = sample['audio_duration'][rn_indx] if 'audio_duration' in sample else 0
- offset = sample['offset'] if 'offset' in sample else 0
- features = self.featurizer.process(sample['audio_filepath'][rn_indx],
- offset=offset, duration=duration,
- trim=self.trim)
-
- AudioSample = namedtuple('AudioSample', ['waveform',
- 'transcript'])
- return AudioSample(features,
- torch.LongTensor(sample["transcript"]))
-
- def __len__(self):
- return len(self.manifest)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py
deleted file mode 100644
index 9ac9fa61aaf..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/decoders.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import List, Optional, Tuple
-
-import torch
-import time
-
-import torch.nn.functional as F
-from model_separable_rnnt import label_collate
-
-class ScriptGreedyDecoder(torch.nn.Module):
- """A greedy transducer decoder.
-
- Args:
- blank_symbol: See `Decoder`.
- model: Model to use for prediction.
- max_symbols_per_step: The maximum number of symbols that can be added
- to a sequence in a single time step; if set to None then there is
- no limit.
- cutoff_prob: Skip to next step in search if current highest character
- probability is less than this.
- """
-
- def __init__(self, blank_index, model, max_symbols_per_step=30):
- super().__init__()
- #assert isinstance(model, torch.jit.ScriptModule)
- # assert not model.training
- self.eval()
- self._model = model
- self._blank_id = blank_index
- self._SOS = -1
- assert max_symbols_per_step > 0
- self._max_symbols_per_step = max_symbols_per_step
-
- @torch.jit.export
- def forward_dec_single_batch(self, logits: torch.Tensor, logits_lens: torch.Tensor, int8, bf16) -> List[List[int]]:
- """Returns a list of sentences given an input batch.
-
- Args:
- logits: logits produced by encoder
- logits_lens: length of each logits
-
- Returns:
- list containing batch number of sentences (strings).
- """
- import intel_pytorch_extension as ipex
- logits = logits.to(ipex.DEVICE)
- if int8:
- if bf16:
- # enable bf16 for decoder part
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- else:
- # the case of int8 = False and bf16 = True had already processed in higher level
- pass
-
- # inseq: TxBxF
- logitlen = logits_lens[0]
- sentence = self._greedy_decode(logits, logitlen)
-
- return [sentence]
-
- @torch.jit.export
- def forward_single_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, bf16, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]:
- """Returns a list of sentences given an input batch.
-
- Args:
- x: A tensor of size (batch, channels, features, seq_len)
- TODO was (seq_len, batch, in_features).
- out_lens: list of int representing the length of each sequence
- output sequence.
-
- Returns:
- list containing batch number of sentences (strings).
- """
- # Apply optional preprocessing
-
- t0 = time.time()
- if int8:
- import intel_pytorch_extension as ipex
- with ipex.AutoMixPrecision(conf, running_mode=run_mode):
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- # TODO: support directly reorder data from int8 to bf16
- # This is an workaround here to transfer logits to cpu
- # to reorder data from int8 to fp32
- logits = logits.to("cpu")
- logits = logits.to(ipex.DEVICE)
-
- if bf16:
- # enable bf16 for decoder part
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- else:
- # the case of int8 = False and bf16 = True had already processed in higher level
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- #os.environ['OMP_NUM_THREADS'] = '1'
- t1 = time.time()
- # inseq: TxBxF
- logitlen = logits_lens[0]
- sentence = self._greedy_decode(logits, logitlen)
- t2 = time.time()
-
- return logits, logits_lens, [sentence], t1-t0, t2-t1
-
- def _greedy_decode(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]:
- hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
- label: List[int] = []
- timesteps = int(out_len.item())
- last_symb = self._SOS
- time_idx = 0
- x.unsqueeze_(0)
-
- symb_added = 0
- while 1:
- g, hidden_prime = self._pred_step(last_symb, hidden)
- logp = self._joint_step_nolog(x[:, :, time_idx, :], g)
-
- # get index k, of max prob
- _, k = logp.max(0)
- k = k.item()
-
- if k == self._blank_id or symb_added >= self._max_symbols_per_step:
- time_idx += 1
- if time_idx >= timesteps:
- break
- symb_added = 0
- else:
- last_symb = k
- label.append(k)
- symb_added += 1
- hidden = hidden_prime
-
- return label
-
- """
- def _greedy_decode_origin(self, x: torch.Tensor, out_len: torch.Tensor) -> List[int]:
- hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
- label: List[int] = []
- for time_idx in range(int(out_len.item())):
- f = x[:, time_idx, :].unsqueeze_(0)
-
- not_blank = True
- symbols_added = 0
-
- while not_blank and symbols_added < self._max_symbols_per_step:
- g, hidden_prime = self._pred_step(
- self._get_last_symb(label),
- hidden
- )
- logp = self._joint_step(f, g, log_normalize=False)[0, :]
-
- # get index k, of max prob
- v, k = logp.max(0)
- k = k.item()
-
- if k == self._blank_id:
- not_blank = False
- else:
- label.append(k)
- hidden = hidden_prime
- symbols_added += 1
-
- return label
- """
-
- def _pred_step(self, label: int, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
- #if label > self._blank_id:
- # label -= 1
- label = torch.tensor([[label]], dtype=torch.int64)
- result = self._model.prediction(label, hidden)
- return result
-
- def _joint_step_nolog(self, enc: torch.Tensor, pred: torch.Tensor) -> torch.Tensor:
- return self._model.joint(enc, pred)[0, 0, 0, :]
-
- def _joint_step(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor:
- logits = self._model.joint(enc, pred)[:, 0, 0, :]
- if not log_normalize:
- return logits
-
- probs = F.log_softmax(logits, dim=len(logits.shape) - 1)
-
- return probs
-
- def _get_last_symb(self, labels: List[int]) -> int:
- return self._SOS if len(labels) == 0 else labels[-1]
-
- @torch.jit.export
- def forward_enc_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor]:
- """Returns a list of sentences given an input batch.
-
- Args:
- x: A tensor of size (batch, channels, features, seq_len)
- TODO was (seq_len, batch, in_features).
- out_lens: list of int representing the length of each sequence
- output sequence.
-
- Returns:
- logits and logits lens
- """
- # Apply optional preprocessing
- # int8 encoder + bf16 decoder
- if int8:
- import intel_pytorch_extension as ipex
- with ipex.AutoMixPrecision(conf, running_mode=run_mode):
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- # TODO: support directly reorder data from int8 to bf16
- # This is an workaround here to transfer logits to cpu
- # to reorder data from int8 to fp32
- logits = logits.to("cpu")
- else:
- # the case of int8 = False and bf16 = True had already processed in higher level
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- return logits, logits_lens
-
- @torch.jit.export
- def forward_dec_batch(self, logits: torch.Tensor, logits_lens: torch.Tensor, int8, bf16) -> Tuple[List[List[int]], float]:
- """Returns a list of sentences given an input batch.
-
- Args:
- logits, logits_lens: encoder input
-
- Returns:
- list containing batch number of sentences (strings).
- """
- # Apply optional preprocessing
- # int8 encoder + bf16 decoder
- import intel_pytorch_extension as ipex
- logits = logits.to(ipex.DEVICE)
- if int8:
- if bf16:
- # enable bf16 for decoder part
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- else:
- # the case of int8 = False and bf16 = True had already processed in higher level
- pass
-
- sentences = self._greedy_decode_batch(logits, logits_lens)
-
- return sentences
-
- @torch.jit.export
- def forward_batch(self, x: torch.Tensor, out_lens: torch.Tensor, conf, int8, bf16, run_mode="inference") -> Tuple[torch.Tensor, torch.Tensor, List[List[int]]]:
- """Returns a list of sentences given an input batch.
-
- Args:
- x: A tensor of size (batch, channels, features, seq_len)
- TODO was (seq_len, batch, in_features).
- out_lens: list of int representing the length of each sequence
- output sequence.
-
- Returns:
- list containing batch number of sentences (strings).
- """
- """
- # Apply optional preprocessing
- # int8 encoder + bf16 decoder
- t0 = time.time()
- if int8:
- import intel_pytorch_extension as ipex
- with ipex.AutoMixPrecision(conf, running_mode=run_mode):
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- # TODO: support directly reorder data from int8 to bf16
- # This is an workaround here to transfer logits to cpu
- # to reorder data from int8 to fp32
- logits = logits.to("cpu")
- logits = logits.to(ipex.DEVICE)
-
- if bf16:
- # enable bf16 for decoder part
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- else:
- # the case of int8 = False and bf16 = True had already processed in higher level
- logits, logits_lens = self._model.encoder(x, out_lens)
-
- t1 = time.time()
- sentences = self._greedy_decode_batch(logits, logits_lens)
- t2 = time.time()
-
- return logits, logits_lens, sentences, t1-t0, t2-t1
- """
- t0 = time.time()
- logits, logits_lens = self.forward_enc_batch(x, out_lens, conf, int8, bf16, run_mode)
- t1 = time.time()
- sentences = self.forward_dec_batch(logits, logits_lens, int8, bf16)
- t2 = time.time()
- return logits, logits_lens, sentences, t1-t0, t2-t1
-
- def count_nonzero(self, x: torch.Tensor) -> int:
- return x.nonzero().shape[0]
-
- def _greedy_decode_batch(self, x: torch.Tensor, out_lens: torch.Tensor) -> List[List[int]]:
- batch_size = x.size(0)
- hidden: Optional[Tuple[torch.Tensor, torch.Tensor]] = None
- max_len = out_lens.max().item()
- max_lens = torch.tensor([max_len-1] * batch_size, dtype=torch.int64)
- # pos 0 of label_tensor is set to _SOS to simplify computation
- # real label start from pos 1
- label_tensor = torch.tensor([self._SOS]).repeat(batch_size, max_len*self._max_symbols_per_step) # (B, T/2*max_symbols_per_step)
- # (row, col) of current labels end
- label_row = torch.tensor(list(range(batch_size)))
- label_col = torch.tensor([0] * batch_size)
- # this list will be used to return labels to caller
- label_copy = [0] * batch_size
- # initially time_idx is 0 for all input
- # then advance time_idx for each 'track' when needed and update f
- f = x[:, 0, :].unsqueeze(1)
- time_idxs = torch.tensor([0] * batch_size, dtype=torch.int64)
-
- not_blank = True
- blank_vec = torch.tensor([0] * batch_size, dtype=torch.int)
- symbols_added = torch.tensor([0] * batch_size, dtype=torch.int)
-
- while True:
- g, hidden_prime = self._pred_step_batch(
- label_tensor.gather(1, label_col.unsqueeze(1)),
- hidden,
- batch_size
- )
- logp = self._joint_step_batch(f, g, log_normalize=False)
-
- # get index k, of max prob
- v, k = logp.max(1)
-
- # if any of the output is blank, pull in the next time_idx for next f
- # tmp_blank_vec is the vect used to mix new hidden state with previous hidden state
- # blank_vec is the baseline of blank_vec, it turns to blank only when run out of time_idx
- blankness = k.eq(self._blank_id)
- time_idxs = time_idxs + blankness
- symbols_added *= blankness.logical_not()
- # it doesn't matter if blank_vec is update now or later,
- # tmp_blank_vec always get correct value for this round
- blank_vec = time_idxs.ge(out_lens)
- tmp_blank_vec = blank_vec.logical_or(blankness)
-
- if self.count_nonzero(blank_vec) == batch_size:
- # all time_idxs processed, stop
- break
- else:
- # If for sample blankid already encountered, then stop
- # update hidden values until input from next time step.
- # So we would mix value of hidden and hidden_prime together,
- # keep values in hidden where blank_vec[i] is true
- if hidden == None:
- hidden = [torch.zeros_like(hidden_prime[0]), torch.zeros_like(hidden_prime[1])]
-
- idx = (tmp_blank_vec.eq(0)).nonzero(as_tuple=True)[0]
- hidden[0][:, idx, :] = hidden_prime[0][:, idx, :]
- hidden[1][:, idx, :] = hidden_prime[1][:, idx, :]
-
- label_col += tmp_blank_vec.eq(False)
- label_tensor.index_put_([label_row, label_col], (k-self._SOS)*tmp_blank_vec.eq(False), accumulate=True)
-
- symbols_added += tmp_blank_vec.eq(False)
- sym_ge_vec = symbols_added.ge(self._max_symbols_per_step)
- if sym_ge_vec.count_nonzero() != 0:
- time_idxs += sym_ge_vec
- blankness.logical_or(sym_ge_vec)
- symbols_added *= symbols_added.lt(self._max_symbols_per_step)
-
- # update f if necessary
- # if at least one id in blankness is blank them time_idx is updated
- # and we need to update f accordingly
- if self.count_nonzero(blankness) > 0:
- fetch_time_idxs = time_idxs.min(max_lens)
- # select tensor along second dim of x
- # implement something like --> f = x[:, :, fetch_time_idxs, :]
- # for example, if all elements in fetch_time_idxs = n, then
- # this is equivelent to f = x[:, :, n, :]
- f = x[list(range(batch_size)), fetch_time_idxs, :].unsqueeze(1)
- for i in range(batch_size):
- label_copy[i]=label_tensor[i][1:label_col[i]+1].tolist()
- return label_copy
-
- def _pred_step_batch(self, label, hidden: Optional[Tuple[torch.Tensor, torch.Tensor]], batch_size) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
- # not really need this line, _blank_id is the last id of dict
- #label = label - label.gt(self._blank_id).int()
- result = self._model.prediction(label, hidden, batch_size)
- return result
-
- def _joint_step_batch(self, enc: torch.Tensor, pred: torch.Tensor, log_normalize: bool=False) -> torch.Tensor:
- logits = self._model.joint(enc, pred)
- logits = logits[:, 0, 0, :]
- if not log_normalize:
- return logits
-
- probs = F.log_softmax(logits, dim=len(logits.shape) - 1)
-
- return probs
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py
deleted file mode 100644
index cfe3b66f3c8..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/helpers.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from enum import Enum
-from metrics import word_error_rate
-
-
-class Optimization(Enum):
- """Various levels of Optimization.
- WARNING: This might have effect on model accuracy."""
- nothing = 0
- mxprO0 = 1
- mxprO1 = 2
- mxprO2 = 3
- mxprO3 = 4
-
-
-AmpOptimizations = {Optimization.mxprO0: "O0",
- Optimization.mxprO1: "O1",
- Optimization.mxprO2: "O2",
- Optimization.mxprO3: "O3"}
-
-
-def add_blank_label(labels):
- if not isinstance(labels, list):
- raise ValueError("labels must be a list of symbols")
- labels.append("")
- return labels
-
-
-def __rnnt_decoder_predictions_tensor(tensor, labels):
- """
- Takes output of greedy rnnt decoder and converts to strings.
- Args:
- tensor: model output tensor
- label: A list of labels
- Returns:
- prediction
- """
- hypotheses = []
- labels_map = dict([(i, labels[i]) for i in range(len(labels))])
- # iterate over batch
- for ind in range(len(tensor)):
- hypothesis = ''.join([labels_map[c] for c in tensor[ind]])
- hypotheses.append(hypothesis)
- return hypotheses
-
-
-def __gather_predictions(predictions_list: list, labels: list) -> list:
- results = []
- for prediction in predictions_list:
- results += __rnnt_decoder_predictions_tensor(prediction, labels=labels)
- return results
-
-
-def __gather_transcripts(transcript_list: list, transcript_len_list: list,
- labels: list) -> list:
- results = []
- labels_map = dict([(i, labels[i]) for i in range(len(labels))])
- for i, t in enumerate(transcript_list):
- target = t.numpy().tolist()
- reference = ''.join([labels_map[c] for c in target])
- results.append(reference)
- return results
-
-
-def process_evaluation_batch(tensors: dict, global_vars: dict, labels: list):
- """
- Processes results of an iteration and saves it in global_vars
- Args:
- tensors: dictionary with results of an evaluation iteration, e.g. loss, predictions, transcript, and output
- global_vars: dictionary where processes results of iteration are saved
- labels: A list of labels
- """
- for kv, v in tensors.items():
- if kv.startswith('predictions'):
- global_vars['predictions'] += __gather_predictions(
- v, labels=labels)
- elif kv.startswith('transcript_length'):
- transcript_len_list = v
- elif kv.startswith('transcript'):
- transcript_list = v
-
- global_vars['transcripts'] += __gather_transcripts(transcript_list,
- transcript_len_list,
- labels=labels)
-
-
-def process_evaluation_epoch(global_vars: dict, tag=None):
- """
- Processes results from each worker at the end of evaluation and combine to final result
- Args:
- global_vars: dictionary containing information of entire evaluation
- Return:
- wer: final word error rate
- loss: final loss
- """
- hypotheses = global_vars['predictions']
- references = global_vars['transcripts']
-
- wer, scores, num_words = word_error_rate(
- hypotheses=hypotheses, references=references)
- return wer
-
-
-def print_dict(d):
- maxLen = max([len(ii) for ii in d.keys()])
- fmtString = '\t%' + str(maxLen) + 's : %s'
- print('Arguments:')
- for keyPair in sorted(d.items()):
- print(fmtString % keyPair)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py
deleted file mode 100644
index 5426e37237a..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/metrics.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import List
-
-
-def __levenshtein(a: List, b: List) -> int:
- """Calculates the Levenshtein distance between a and b.
- """
- n, m = len(a), len(b)
- if n > m:
- # Make sure n <= m, to use O(min(n,m)) space
- a, b = b, a
- n, m = m, n
-
- current = list(range(n + 1))
- for i in range(1, m + 1):
- previous, current = current, [i] + [0] * n
- for j in range(1, n + 1):
- add, delete = previous[j] + 1, current[j - 1] + 1
- change = previous[j - 1]
- if a[j - 1] != b[i - 1]:
- change = change + 1
- current[j] = min(add, delete, change)
-
- return current[n]
-
-
-def word_error_rate(hypotheses: List[str], references: List[str]) -> float:
- """
- Computes Average Word Error rate between two texts represented as
- corresponding lists of string. Hypotheses and references must have same length.
-
- Args:
- hypotheses: list of hypotheses
- references: list of references
-
- Returns:
- (float) average word error rate
- """
- scores = 0
- words = 0
- if len(hypotheses) != len(references):
- raise ValueError("In word error rate calculation, hypotheses and reference"
- " lists must have the same number of elements. But I got:"
- "{0} and {1} correspondingly".format(len(hypotheses), len(references)))
- for h, r in zip(hypotheses, references):
- h_list = h.split()
- r_list = r.split()
- words += len(r_list)
- scores += __levenshtein(h_list, r_list)
- if words != 0:
- wer = (1.0 * scores) / words
- else:
- wer = float('inf')
- return wer, scores, words
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py
deleted file mode 100644
index f0ef252130c..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/model_separable_rnnt.py
+++ /dev/null
@@ -1,216 +0,0 @@
-from typing import Optional, Tuple
-
-import numpy as np
-import torch
-
-from rnn import rnn
-from rnn import StackTime
-
-
-class RNNT(torch.nn.Module):
- def __init__(self, rnnt=None, num_classes=1, **kwargs):
- super().__init__()
- if kwargs.get("no_featurizer", False):
- in_features = kwargs.get("in_features")
- else:
- feat_config = kwargs.get("feature_config")
- # This may be useful in the future, for MLPerf
- # configuration.
- in_features = feat_config['features'] * \
- feat_config.get("frame_splicing", 1)
-
- self.encoder = Encoder(in_features,
- rnnt["encoder_n_hidden"],
- rnnt["encoder_pre_rnn_layers"],
- rnnt["encoder_post_rnn_layers"],
- rnnt["forget_gate_bias"],
- None if "norm" not in rnnt else rnnt["norm"],
- rnnt["rnn_type"],
- rnnt["encoder_stack_time_factor"],
- rnnt["dropout"],
- )
-
- self.prediction = Prediction(
- num_classes,
- rnnt["pred_n_hidden"],
- rnnt["pred_rnn_layers"],
- rnnt["forget_gate_bias"],
- None if "norm" not in rnnt else rnnt["norm"],
- rnnt["rnn_type"],
- rnnt["dropout"],
- -1, #_SOS
- )
-
- self.joint = Joint(
- num_classes,
- rnnt["pred_n_hidden"],
- rnnt["encoder_n_hidden"],
- rnnt["joint_n_hidden"],
- rnnt["dropout"],
- )
-
- def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
- return self.encoder(x_padded, x_lens)
-
-
-class Encoder(torch.nn.Module):
- def __init__(self, in_features, encoder_n_hidden,
- encoder_pre_rnn_layers, encoder_post_rnn_layers,
- forget_gate_bias, norm, rnn_type, encoder_stack_time_factor,
- dropout):
- super().__init__()
- self.pre_rnn = rnn(
- rnn=rnn_type,
- input_size=in_features,
- hidden_size=encoder_n_hidden,
- num_layers=encoder_pre_rnn_layers,
- norm=norm,
- forget_gate_bias=forget_gate_bias,
- dropout=dropout,
- )
- self.stack_time = StackTime(factor=encoder_stack_time_factor)
- self.post_rnn = rnn(
- rnn=rnn_type,
- input_size=encoder_stack_time_factor * encoder_n_hidden,
- hidden_size=encoder_n_hidden,
- num_layers=encoder_post_rnn_layers,
- norm=norm,
- forget_gate_bias=forget_gate_bias,
- norm_first_rnn=True,
- dropout=dropout,
- )
-
- def forward(self, x_padded: torch.Tensor, x_lens: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
- x_padded, _ = self.pre_rnn(x_padded, None)
- x_padded, x_lens = self.stack_time(x_padded, x_lens)
- # (T, B, H)
- x_padded, _ = self.post_rnn(x_padded, None)
- # (B, T, H)
- x_padded = x_padded.transpose_(0, 1)
- return x_padded, x_lens
-
-class Prediction(torch.nn.Module):
- def __init__(self, vocab_size, n_hidden, pred_rnn_layers,
- forget_gate_bias, norm, rnn_type, dropout, sos_val):
- super().__init__()
- self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden)
- self.n_hidden = n_hidden
- self.dec_rnn = rnn(
- rnn=rnn_type,
- input_size=n_hidden,
- hidden_size=n_hidden,
- num_layers=pred_rnn_layers,
- norm=norm,
- forget_gate_bias=forget_gate_bias,
- dropout=dropout,
- )
- self._SOS = sos_val
-
- def forward(self, y: torch.Tensor,
- state: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
- b: int = 1) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
- """
- B - batch size
- U - label length
- H - Hidden dimension size
- L - Number of decoder layers = 2
-
- Args:
- y: (B, U)
-
- Returns:
- Tuple (g, hid) where:
- g: (B, U + 1, H)
- hid: (h, c) where h is the final sequence hidden state and c is
- the final cell state:
- h (tensor), shape (L, B, H)
- c (tensor), shape (L, B, H)
- """
- # SOS hack, there is no SOS, and SOS should as if embedding give 0.0
- # So identify SOS and fill lookup result with 0.0
- # If embedding table contains SOS token this would save a lot of
- # trouble
- y_mask = y.eq(self._SOS)
- y.masked_fill_(y_mask, 0)
- y = self.embed(y)
- y.masked_fill_(y_mask.unsqueeze(2), 0.0)
-
- # if state is None:
- # batch = y.size(0)
- # state = [
- # (torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device),
- # torch.zeros(batch, self.pred_n_hidden, dtype=y.dtype, device=y.device))
- # for _ in range(self.pred_rnn_layers)
- # ]
-
- y = y.transpose_(0, 1) # .contiguous() # (U + 1, B, H)
- g, hid = self.dec_rnn(y, state)
- g = g.transpose_(0, 1) # .contiguous() # (B, U + 1, H)
- # del y, state
- return g, hid
-
-class Joint(torch.nn.Module):
- def __init__(self, vocab_size, pred_n_hidden, enc_n_hidden,
- joint_n_hidden, dropout):
- super().__init__()
- layers = [
- torch.nn.Linear(pred_n_hidden + enc_n_hidden, joint_n_hidden),
- torch.nn.ReLU(),
- ] + ([torch.nn.Dropout(p=dropout), ] if dropout else []) + [
- torch.nn.Linear(joint_n_hidden, vocab_size)
- ]
- self.net = torch.nn.Sequential(
- *layers
- )
-
- def forward(self, f: torch.Tensor, g: torch.Tensor):
- """
- f should be shape (B, T, H)
- g should be shape (B, U + 1, H)
-
- returns:
- logits of shape (B, T, U, K + 1)
- """
- # Combine the input states and the output states
- B, T, H = f.shape
- B, U_, H2 = g.shape
-
- f = f.unsqueeze(dim=2) # (B, T, 1, H)
- f = f.expand((B, T, U_, H))
-
- g = g.unsqueeze(dim=1) # (B, 1, U + 1, H)
- g = g.expand((B, T, U_, H2))
-
- inp = torch.cat([f, g], dim=3) # (B, T, U, 2H)
- res = self.net(inp)
- # del f, g, inp
- return res
-
-def label_collate(labels):
- """Collates the label inputs for the rnn-t prediction network.
-
- If `labels` is already in torch.Tensor form this is a no-op.
-
- Args:
- labels: A torch.Tensor List of label indexes or a torch.Tensor.
-
- Returns:
- A padded torch.Tensor of shape (batch, max_seq_len).
- """
-
- if isinstance(labels, torch.Tensor):
- return labels.type(torch.int64)
- if not isinstance(labels, (list, tuple)):
- raise ValueError(
- f"`labels` should be a list or tensor not {type(labels)}"
- )
-
- batch_size = len(labels)
- max_len = max(len(l) for l in labels)
-
- cat_labels = np.full((batch_size, max_len), fill_value=0.0, dtype=np.int32)
- for e, l in enumerate(labels):
- cat_labels[e, :len(l)] = l
- labels = torch.LongTensor(cat_labels)
-
- return labels
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py
deleted file mode 100644
index 7b839dfa47a..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/features.py
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Tuple
-
-import torch
-import torch.nn as nn
-import math
-import librosa
-from .segment import AudioSegment
-
-
-class WaveformFeaturizer(object):
- def __init__(self, input_cfg):
- self.cfg = input_cfg
-
- def process(self, file_path, offset=0, duration=0, trim=False):
- audio = AudioSegment.from_file(file_path,
- target_sr=self.cfg['sample_rate'],
- int_values=self.cfg.get(
- 'int_values', False),
- offset=offset, duration=duration, trim=trim)
- return self.process_segment(audio)
-
- def process_segment(self, audio_segment):
- return torch.tensor(audio_segment.samples, dtype=torch.float)
-
- @classmethod
- def from_config(cls, input_config, perturbation_configs=None):
- return cls(input_config)
-
-
-constant = 1e-5
-
-
-def normalize_batch(x, seq_len, normalize_type):
- if normalize_type == "per_feature":
- x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype,
- device=x.device)
- x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype,
- device=x.device)
- for i in range(x.shape[0]):
- x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1)
- x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1)
- # make sure x_std is not zero
- x_std += constant
- return (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2)
- elif normalize_type == "all_features":
- x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device)
- x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device)
- for i in range(x.shape[0]):
- x_mean[i] = x[i, :, :seq_len[i].item()].mean()
- x_std[i] = x[i, :, :seq_len[i].item()].std()
- # make sure x_std is not zero
- x_std += constant
- return (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1)
- else:
- return x
-
-
-def splice_frames(x, frame_splicing):
- """ Stacks frames together across feature dim
-
- input is batch_size, feature_dim, num_frames
- output is batch_size, feature_dim*frame_splicing, num_frames
-
- """
- seq = [x]
- for n in range(1, frame_splicing):
- tmp = torch.zeros_like(x)
- tmp[:, :, :-n] = x[:, :, n:]
- seq.append(tmp)
- return torch.cat(seq, dim=1)[:, :, ::frame_splicing]
-
-
-class FilterbankFeatures(nn.Module):
- def __init__(self, sample_rate=8000, window_size=0.02, window_stride=0.01,
- window="hamming", normalize="per_feature", n_fft=None,
- preemph=0.97,
- nfilt=64, lowfreq=0, highfreq=None, log=True, dither=constant,
- pad_to=8,
- max_duration=16.7,
- frame_splicing=1):
- super(FilterbankFeatures, self).__init__()
-# print("PADDING: {}".format(pad_to))
-
- torch_windows = {
- 'hann': torch.hann_window,
- 'hamming': torch.hamming_window,
- 'blackman': torch.blackman_window,
- 'bartlett': torch.bartlett_window,
- 'none': None,
- }
-
- self.win_length = int(sample_rate * window_size) # frame size
- self.hop_length = int(sample_rate * window_stride)
- self.n_fft = n_fft or 2 ** math.ceil(math.log2(self.win_length))
-
- self.normalize = normalize
- self.log = log
- self.dither = dither
- self.frame_splicing = frame_splicing
- self.nfilt = nfilt
- self.preemph = preemph
- self.pad_to = pad_to
- # For now, always enable this.
- # See https://docs.google.com/presentation/d/1IVC3J-pHB-ipJpKsJox_SqmDHYdkIaoCXTbKmJmV2-I/edit?usp=sharing for elaboration
- self.use_deterministic_dithering = True
- highfreq = highfreq or sample_rate / 2
- window_fn = torch_windows.get(window, None)
- window_tensor = window_fn(self.win_length,
- periodic=False) if window_fn else None
- filterbanks = torch.tensor(
- librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq,
- fmax=highfreq), dtype=torch.float).unsqueeze(0)
- # self.fb = filterbanks
- # self.window = window_tensor
- self.register_buffer("fb", filterbanks)
- self.register_buffer("window", window_tensor)
- # Calculate maximum sequence length (# frames)
- max_length = 1 + math.ceil(
- (max_duration * sample_rate - self.win_length) / self.hop_length
- )
- max_pad = 16 - (max_length % 16)
- self.max_length = max_length + max_pad
-
- def get_seq_len(self, seq_len):
- seq_len = (seq_len + self.hop_length - 1) // self.hop_length
- seq_len = (seq_len + self.frame_splicing - 1) // self.frame_splicing
- return seq_len
-
- @torch.no_grad()
- def forward(self, inp: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
- x, seq_len = inp
-
- dtype = x.dtype
-
- seq_len = self.get_seq_len(seq_len)
-
- # dither
- if self.dither > 0 and not self.use_deterministic_dithering:
- x += self.dither * torch.randn_like(x)
-
- # do preemphasis
- # Ideally, we would mask immediately after this... Ugh :(
- if self.preemph is not None:
- x = torch.cat((x[:, 0].unsqueeze(1), x[:, 1:] - self.preemph * x[:, :-1]),
- dim=1)
-
- # do stft
- x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop_length,
- win_length=self.win_length,
- center=True, window=self.window.to(dtype=torch.float),
- return_complex=False)
-
- # get power spectrum
- x = x.pow(2).sum(-1)
-
- if self.dither > 0 and self.use_deterministic_dithering:
- x = x + self.dither ** 2
- # dot with filterbank energies
- x = torch.matmul(self.fb.to(x.dtype), x)
-
- # log features if required
- if self.log:
- x = torch.log(x + 1e-20)
-
- # frame splicing if required
- if self.frame_splicing > 1:
- seq = [x]
- for n in range(1, self.frame_splicing):
- tmp = torch.zeros_like(x)
- tmp[:, :, :-n] = x[:, :, n:]
- seq.append(tmp)
- x = torch.cat(seq, dim=1)[:, :, ::self.frame_splicing]
-
- # normalize if required
- constant = 1e-5
- if self.normalize == "per_feature":
- x_mean = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype,
- device=x.device)
- x_std = torch.zeros((seq_len.shape[0], x.shape[1]), dtype=x.dtype,
- device=x.device)
- for i in range(x.shape[0]):
- x_mean[i, :] = x[i, :, :seq_len[i]].mean(dim=1)
- x_std[i, :] = x[i, :, :seq_len[i]].std(dim=1)
- # make sure x_std is not zero
- x_std += constant
- x = (x - x_mean.unsqueeze(2)) / x_std.unsqueeze(2)
- elif self.normalize == "all_features":
- x_mean = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device)
- x_std = torch.zeros(seq_len.shape, dtype=x.dtype, device=x.device)
- for i in range(x.shape[0]):
- x_mean[i] = x[i, :, :seq_len[i].item()].mean()
- x_std[i] = x[i, :, :seq_len[i].item()].std()
- # make sure x_std is not zero
- x_std += constant
- x = (x - x_mean.view(-1, 1, 1)) / x_std.view(-1, 1, 1)
- else:
- x = x
-
- # Hmmm... They don't do any masking anymore. Seems concerning!
-
- # mask to zero any values beyond seq_len in batch, pad to multiple of `pad_to` (for efficiency)
- # max_len = x.size(-1)
- x = x[:, :, :seq_len.max()] # rnnt loss requires lengths to match
- # mask = torch.arange(max_len).to(seq_len.dtype).to(x.device).expand(x.size(0),
- # max_len) >= seq_len.unsqueeze(1)
-
- # x = x.masked_fill(mask.unsqueeze(1).to(device=x.device), 0)
- pad_to = self.pad_to
- if pad_to != 0:
- raise NotImplementedError()
- # if pad_to == "max":
- # x = nn.functional.pad(x, (0, self.max_length - x.size(-1)))
- # elif pad_to > 0:
- # pad_amt = x.size(-1) % pad_to
- # if pad_amt != 0:
- # x = nn.functional.pad(x, (0, pad_to - pad_amt))
-
- return x.to(dtype)
-
- @classmethod
- def from_config(cls, cfg, log=False):
- return cls(sample_rate=cfg['sample_rate'], window_size=cfg['window_size'],
- window_stride=cfg['window_stride'], n_fft=cfg['n_fft'],
- nfilt=cfg['features'], window=cfg['window'],
- normalize=cfg['normalize'],
- max_duration=cfg.get('max_duration', 16.7),
- dither=cfg['dither'], pad_to=cfg.get("pad_to", 0),
- frame_splicing=cfg.get("frame_splicing", 1), log=log)
-
-
-class FeatureFactory(object):
- featurizers = {
- "logfbank": FilterbankFeatures,
- "fbank": FilterbankFeatures,
- }
-
- def __init__(self):
- pass
-
- @classmethod
- def from_config(cls, cfg):
- feat_type = cfg.get('feat_type', "logspect")
- featurizer = cls.featurizers[feat_type]
- # return featurizer.from_config(cfg, log="log" in cfg['feat_type'])
- return featurizer.from_config(cfg, log="log" in feat_type)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py
deleted file mode 100644
index fb04c5da882..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/manifest.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import string
-import os
-
-from .text import _clean_text
-
-
-def normalize_string(s, labels, table, **unused_kwargs):
- """
- Normalizes string. For example:
- 'call me at 8:00 pm!' -> 'call me at eight zero pm'
-
- Args:
- s: string to normalize
- labels: labels used during model training.
-
- Returns:
- Normalized string
- """
-
- def good_token(token, labels):
- s = set(labels)
- for t in token:
- if t not in s:
- return False
- return True
-
- try:
- text = _clean_text(s, ["english_cleaners"], table).strip()
- return ''.join([t for t in text if good_token(t, labels=labels)])
- except:
- print("WARNING: Normalizing {} failed".format(s))
- return None
-
-
-class Manifest(object):
- def __init__(self, data_dir, manifest_paths, labels, blank_index, max_duration=None, pad_to_max=False,
- min_duration=None, sort_by_duration=False, max_utts=0,
- normalize=True, speed_perturbation=False, filter_speed=1.0):
- self.labels_map = dict([(labels[i], i) for i in range(len(labels))])
- self.blank_index = blank_index
- self.max_duration = max_duration
- ids = []
- duration = 0.0
- filtered_duration = 0.0
-
- # If removing punctuation, make a list of punctuation to remove
- table = None
- if normalize:
- # Punctuation to remove
- punctuation = string.punctuation
- punctuation = punctuation.replace("+", "")
- punctuation = punctuation.replace("&", "")
- # We might also want to consider:
- # @ -> at
- # -> number, pound, hashtag
- # ~ -> tilde
- # _ -> underscore
- # % -> percent
- # If a punctuation symbol is inside our vocab, we do not remove from text
- for l in labels:
- punctuation = punctuation.replace(l, "")
- # Turn all punctuation to whitespace
- table = str.maketrans(punctuation, " " * len(punctuation))
- for manifest_path in manifest_paths:
- with open(manifest_path, "r", encoding="utf-8") as fh:
- a = json.load(fh)
- for data in a:
- files_and_speeds = data['files']
-
- if pad_to_max:
- if not speed_perturbation:
- min_speed = filter_speed
- else:
- min_speed = min(x['speed']
- for x in files_and_speeds)
- max_duration = self.max_duration * min_speed
-
- data['duration'] = data['original_duration']
- if min_duration is not None and data['duration'] < min_duration:
- filtered_duration += data['duration']
- continue
- if max_duration is not None and data['duration'] > max_duration:
- filtered_duration += data['duration']
- continue
-
- # Prune and normalize according to transcript
- transcript_text = data[
- 'transcript'] if "transcript" in data else self.load_transcript(
- data['text_filepath'])
- if normalize:
- transcript_text = normalize_string(transcript_text, labels=labels,
- table=table)
- if not isinstance(transcript_text, str):
- print(
- "WARNING: Got transcript: {}. It is not a string. Dropping data point".format(
- transcript_text))
- filtered_duration += data['duration']
- continue
- data["transcript"] = self.parse_transcript(
- transcript_text) # convert to vocab indices
-
- if speed_perturbation:
- audio_paths = [x['fname'] for x in files_and_speeds]
- data['audio_duration'] = [x['duration']
- for x in files_and_speeds]
- else:
- audio_paths = [
- x['fname'] for x in files_and_speeds if x['speed'] == filter_speed]
- data['audio_duration'] = [x['duration']
- for x in files_and_speeds if x['speed'] == filter_speed]
- data['audio_filepath'] = [os.path.join(
- data_dir, x) for x in audio_paths]
- data.pop('files')
- data.pop('original_duration')
-
- ids.append(data)
- duration += data['duration']
-
- if max_utts > 0 and len(ids) >= max_utts:
- print(
- 'Stopping parsing %s as max_utts=%d' % (manifest_path, max_utts))
- break
-
- if sort_by_duration:
- ids = sorted(ids, key=lambda x: x['duration'])
- self._data = ids
- self._size = len(ids)
- self._duration = duration
- self._filtered_duration = filtered_duration
-
- def load_transcript(self, transcript_path):
- with open(transcript_path, 'r', encoding="utf-8") as transcript_file:
- transcript = transcript_file.read().replace('\n', '')
- return transcript
-
- def parse_transcript(self, transcript):
- chars = [self.labels_map.get(x, self.blank_index)
- for x in list(transcript)]
- transcript = list(filter(lambda x: x != self.blank_index, chars))
- return transcript
-
- def __getitem__(self, item):
- return self._data[item]
-
- def __len__(self):
- return self._size
-
- def __iter__(self):
- return iter(self._data)
-
- @property
- def duration(self):
- return self._duration
-
- @property
- def filtered_duration(self):
- return self._filtered_duration
-
- @property
- def data(self):
- return list(self._data)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py
deleted file mode 100644
index 08aa5c6a492..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/segment.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import librosa
-import soundfile as sf
-
-
-class AudioSegment(object):
- """Monaural audio segment abstraction.
- :param samples: Audio samples [num_samples x num_channels].
- :type samples: ndarray.float32
- :param sample_rate: Audio sample rate.
- :type sample_rate: int
- :raises TypeError: If the sample data type is not float or int.
- """
-
- def __init__(self, samples, sample_rate, target_sr=None, trim=False,
- trim_db=60):
- """Create audio segment from samples.
- Samples are convert float32 internally, with int scaled to [-1, 1].
- """
- samples = self._convert_samples_to_float32(samples)
- if target_sr is not None and target_sr != sample_rate:
- samples = librosa.core.resample(samples, sample_rate, target_sr)
- sample_rate = target_sr
- if trim:
- samples, _ = librosa.effects.trim(samples, trim_db)
- self._samples = samples
- self._sample_rate = sample_rate
- if self._samples.ndim >= 2:
- self._samples = np.mean(self._samples, 1)
-
- def __eq__(self, other):
- """Return whether two objects are equal."""
- if type(other) is not type(self):
- return False
- if self._sample_rate != other._sample_rate:
- return False
- if self._samples.shape != other._samples.shape:
- return False
- if np.any(self.samples != other._samples):
- return False
- return True
-
- def __ne__(self, other):
- """Return whether two objects are unequal."""
- return not self.__eq__(other)
-
- def __str__(self):
- """Return human-readable representation of segment."""
- return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, "
- "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate,
- self.duration, self.rms_db))
-
- @staticmethod
- def _convert_samples_to_float32(samples):
- """Convert sample type to float32.
- Audio sample type is usually integer or float-point.
- Integers will be scaled to [-1, 1] in float32.
- """
- float32_samples = samples.astype('float32')
- if samples.dtype in np.sctypes['int']:
- bits = np.iinfo(samples.dtype).bits
- float32_samples *= (1. / 2 ** (bits - 1))
- elif samples.dtype in np.sctypes['float']:
- pass
- else:
- raise TypeError("Unsupported sample type: %s." % samples.dtype)
- return float32_samples
-
- @classmethod
- def from_file(cls, filename, target_sr=None, int_values=False, offset=0,
- duration=0, trim=False):
- """
- Load a file supported by librosa and return as an AudioSegment.
- :param filename: path of file to load
- :param target_sr: the desired sample rate
- :param int_values: if true, load samples as 32-bit integers
- :param offset: offset in seconds when loading audio
- :param duration: duration in seconds when loading audio
- :return: numpy array of samples
- """
- with sf.SoundFile(filename, 'r') as f:
- dtype = 'int32' if int_values else 'float32'
- sample_rate = f.samplerate
- if offset > 0:
- f.seek(int(offset * sample_rate))
- if duration > 0:
- samples = f.read(int(duration * sample_rate), dtype=dtype)
- else:
- samples = f.read(dtype=dtype)
- samples = samples.transpose()
- return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
-
- @property
- def samples(self):
- return self._samples.copy()
-
- @property
- def sample_rate(self):
- return self._sample_rate
-
- @property
- def num_samples(self):
- return self._samples.shape[0]
-
- @property
- def duration(self):
- return self._samples.shape[0] / float(self._sample_rate)
-
- @property
- def rms_db(self):
- mean_square = np.mean(self._samples ** 2)
- return 10 * np.log10(mean_square)
-
- def gain_db(self, gain):
- self._samples *= 10. ** (gain / 20.)
-
- def pad(self, pad_size, symmetric=False):
- """Add zero padding to the sample. The pad size is given in number of samples.
- If symmetric=True, `pad_size` will be added to both sides. If false, `pad_size`
- zeros will be added only to the end.
- """
- self._samples = np.pad(self._samples,
- (pad_size if symmetric else 0, pad_size),
- mode='constant')
-
- def subsegment(self, start_time=None, end_time=None):
- """Cut the AudioSegment between given boundaries.
- Note that this is an in-place transformation.
- :param start_time: Beginning of subsegment in seconds.
- :type start_time: float
- :param end_time: End of subsegment in seconds.
- :type end_time: float
- :raise ValueError: If start_time or end_time is incorrectly set, e.g. out
- of bounds in time.
- """
- start_time = 0.0 if start_time is None else start_time
- end_time = self.duration if end_time is None else end_time
- if start_time < 0.0:
- start_time = self.duration + start_time
- if end_time < 0.0:
- end_time = self.duration + end_time
- if start_time < 0.0:
- raise ValueError("The slice start position (%f s) is out of "
- "bounds." % start_time)
- if end_time < 0.0:
- raise ValueError("The slice end position (%f s) is out of bounds." %
- end_time)
- if start_time > end_time:
- raise ValueError("The slice start position (%f s) is later than "
- "the end position (%f s)." % (start_time, end_time))
- if end_time > self.duration:
- raise ValueError("The slice end position (%f s) is out of bounds "
- "(> %f s)" % (end_time, self.duration))
- start_sample = int(round(start_time * self._sample_rate))
- end_sample = int(round(end_time * self._sample_rate))
- self._samples = self._samples[start_sample:end_sample]
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE
deleted file mode 100644
index 4ad4ed1d5e3..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/LICENSE
+++ /dev/null
@@ -1,19 +0,0 @@
-Copyright (c) 2017 Keith Ito
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py
deleted file mode 100644
index 61936879a95..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) 2017 Keith Ito
-""" from https://github.com/keithito/tacotron """
-from . import cleaners
-
-
-def _clean_text(text, cleaner_names, *args):
- for name in cleaner_names:
- cleaner = getattr(cleaners, name)
- if not cleaner:
- raise Exception('Unknown cleaner: %s' % name)
- text = cleaner(text, *args)
- return text
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py
deleted file mode 100644
index e1e52af5f37..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/cleaners.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2017 Keith Ito
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-""" from https://github.com/keithito/tacotron
-Modified to add puncturation removal
-"""
-
-'''
-Cleaners are transformations that run over the input text at both training and eval time.
-
-Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
-hyperparameter. Some cleaners are English-specific. You'll typically want to use:
- 1. "english_cleaners" for English text
- 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
- the Unidecode library (https://pypi.python.org/pypi/Unidecode)
- 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
- the symbols in symbols.py to match your data).
-
-'''
-
-
-# Regular expression matching whitespace:
-import re
-from text_unidecode import unidecode
-from .numbers import normalize_numbers
-_whitespace_re = re.compile(r'\s+')
-
-# List of (regular expression, replacement) pairs for abbreviations:
-_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
- ('mrs', 'misess'),
- ('mr', 'mister'),
- ('dr', 'doctor'),
- ('st', 'saint'),
- ('co', 'company'),
- ('jr', 'junior'),
- ('maj', 'major'),
- ('gen', 'general'),
- ('drs', 'doctors'),
- ('rev', 'reverend'),
- ('lt', 'lieutenant'),
- ('hon', 'honorable'),
- ('sgt', 'sergeant'),
- ('capt', 'captain'),
- ('esq', 'esquire'),
- ('ltd', 'limited'),
- ('col', 'colonel'),
- ('ft', 'fort'),
-]]
-
-
-def expand_abbreviations(text):
- for regex, replacement in _abbreviations:
- text = re.sub(regex, replacement, text)
- return text
-
-
-def expand_numbers(text):
- return normalize_numbers(text)
-
-
-def lowercase(text):
- return text.lower()
-
-
-def collapse_whitespace(text):
- return re.sub(_whitespace_re, ' ', text)
-
-
-def convert_to_ascii(text):
- return unidecode(text)
-
-
-def remove_punctuation(text, table):
- text = text.translate(table)
- text = re.sub(r'&', " and ", text)
- text = re.sub(r'\+', " plus ", text)
- return text
-
-
-def basic_cleaners(text):
- '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
- text = lowercase(text)
- text = collapse_whitespace(text)
- return text
-
-
-def transliteration_cleaners(text):
- '''Pipeline for non-English text that transliterates to ASCII.'''
- text = convert_to_ascii(text)
- text = lowercase(text)
- text = collapse_whitespace(text)
- return text
-
-
-def english_cleaners(text, table=None):
- '''Pipeline for English text, including number and abbreviation expansion.'''
- text = convert_to_ascii(text)
- text = lowercase(text)
- text = expand_numbers(text)
- text = expand_abbreviations(text)
- if table is not None:
- text = remove_punctuation(text, table)
- text = collapse_whitespace(text)
- return text
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py
deleted file mode 100644
index 3d2f77121c8..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/parts/text/numbers.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2017 Keith Ito
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" from https://github.com/keithito/tacotron
-Modifed to add support for time and slight tweaks to _expand_number
-"""
-
-import inflect
-import re
-
-
-_inflect = inflect.engine()
-_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
-_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
-_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
-_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
-_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
-_number_re = re.compile(r'[0-9]+')
-_time_re = re.compile(r'([0-9]{1,2}):([0-9]{2})')
-
-
-def _remove_commas(m):
- return m.group(1).replace(',', '')
-
-
-def _expand_decimal_point(m):
- return m.group(1).replace('.', ' point ')
-
-
-def _expand_dollars(m):
- match = m.group(1)
- parts = match.split('.')
- if len(parts) > 2:
- return match + ' dollars' # Unexpected format
- dollars = int(parts[0]) if parts[0] else 0
- cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
- if dollars and cents:
- dollar_unit = 'dollar' if dollars == 1 else 'dollars'
- cent_unit = 'cent' if cents == 1 else 'cents'
- return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
- elif dollars:
- dollar_unit = 'dollar' if dollars == 1 else 'dollars'
- return '%s %s' % (dollars, dollar_unit)
- elif cents:
- cent_unit = 'cent' if cents == 1 else 'cents'
- return '%s %s' % (cents, cent_unit)
- else:
- return 'zero dollars'
-
-
-def _expand_ordinal(m):
- return _inflect.number_to_words(m.group(0))
-
-
-def _expand_number(m):
- if int(m.group(0)[0]) == 0:
- return _inflect.number_to_words(m.group(0), andword='', group=1)
- num = int(m.group(0))
- if num > 1000 and num < 3000:
- if num == 2000:
- return 'two thousand'
- elif num > 2000 and num < 2010:
- return 'two thousand ' + _inflect.number_to_words(num % 100)
- elif num % 100 == 0:
- return _inflect.number_to_words(num // 100) + ' hundred'
- else:
- return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
- # Add check for number phones and other large numbers
- elif num > 1000000000 and num % 10000 != 0:
- return _inflect.number_to_words(num, andword='', group=1)
- else:
- return _inflect.number_to_words(num, andword='')
-
-
-def _expand_time(m):
- mins = int(m.group(2))
- if mins == 0:
- return _inflect.number_to_words(m.group(1))
- return " ".join([_inflect.number_to_words(m.group(1)), _inflect.number_to_words(m.group(2))])
-
-
-def normalize_numbers(text):
- text = re.sub(_comma_number_re, _remove_commas, text)
- text = re.sub(_pounds_re, r'\1 pounds', text)
- text = re.sub(_dollars_re, _expand_dollars, text)
- text = re.sub(_decimal_number_re, _expand_decimal_point, text)
- text = re.sub(_ordinal_re, _expand_ordinal, text)
- text = re.sub(_number_re, _expand_number, text)
- text = re.sub(_time_re, _expand_time, text)
- return text
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py
deleted file mode 100644
index 581885466b0..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/preprocessing.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Tuple
-
-import torch
-import torch.nn as nn
-
-from helpers import Optimization
-from parts.features import FeatureFactory
-
-
-class AudioPreprocessing(nn.Module):
- """GPU accelerated audio preprocessing
- """
-
- def __init__(self, **kwargs):
- nn.Module.__init__(self) # For PyTorch API
- self.optim_level = kwargs.get(
- 'optimization_level', Optimization.nothing)
- self.featurizer = FeatureFactory.from_config(kwargs)
-
- def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]:
- input_signal, length = x
- length.requires_grad_(False)
- processed_signal = self.featurizer(x)
- processed_length = self.featurizer.get_seq_len(length)
- return processed_signal, processed_length
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py
deleted file mode 100644
index 9bbea9c0a67..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/rnn.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-
-from typing import Optional, Tuple
-
-
-def rnn(rnn, input_size, hidden_size, num_layers, norm=None,
- forget_gate_bias=1.0, dropout=0.0, **kwargs):
- """TODO"""
- if rnn != "lstm":
- raise ValueError(f"Unknown rnn={rnn}")
- if norm not in [None]:
- raise ValueError(f"unknown norm={norm}")
-
- if rnn == "lstm":
- return LstmDrop(
- input_size=input_size,
- hidden_size=hidden_size,
- num_layers=num_layers,
- dropout=dropout,
- forget_gate_bias=forget_gate_bias,
- **kwargs
- )
-
-
-class LstmDrop(torch.nn.Module):
-
- def __init__(self, input_size, hidden_size, num_layers, dropout, forget_gate_bias,
- **kwargs):
- """Returns an LSTM with forget gate bias init to `forget_gate_bias`.
-
- Args:
- input_size: See `torch.nn.LSTM`.
- hidden_size: See `torch.nn.LSTM`.
- num_layers: See `torch.nn.LSTM`.
- dropout: See `torch.nn.LSTM`.
- forget_gate_bias: For each layer and each direction, the total value of
- to initialise the forget gate bias to.
-
- Returns:
- A `torch.nn.LSTM`.
- """
- super(LstmDrop, self).__init__()
-
- self.lstm = torch.nn.LSTM(
- input_size=input_size,
- hidden_size=hidden_size,
- num_layers=num_layers,
- dropout=dropout,
- )
- if forget_gate_bias is not None:
- for name, v in self.lstm.named_parameters():
- if "bias_ih" in name:
- bias = getattr(self.lstm, name)
- bias.data[hidden_size:2 * hidden_size].fill_(forget_gate_bias)
- if "bias_hh" in name:
- bias = getattr(self.lstm, name)
- bias.data[hidden_size:2 * hidden_size].fill_(0)
-
- if dropout:
- self.inplace_dropout = torch.nn.Dropout(dropout, inplace=True)
- else:
- self.inplace_droput = None
-
- def forward(self, x: torch.Tensor,
- h: Optional[Tuple[torch.Tensor, torch.Tensor]] = None):
- x, h = self.lstm(x, h)
-
- if self.inplace_dropout is not None:
- self.inplace_dropout(x.data)
-
- return x, h
-
-
-class StackTime(torch.nn.Module):
-
- __constants__ = ["factor"]
-
- def __init__(self, factor):
- super().__init__()
- self.factor = int(factor)
-
- def forward(self, x, x_lens):
- # T, B, U
- seq = [x]
- for i in range(1, self.factor):
- # This doesn't seem to make much sense...
- tmp = torch.zeros_like(x)
- tmp[:-i, :, :] = x[i:, :, :]
- seq.append(tmp)
- x_lens = torch.ceil(x_lens.float() / self.factor).int()
- # Gross, this is horrible. What a waste of memory...
- return torch.cat(seq, dim=2)[::self.factor, :, :], x_lens
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh
deleted file mode 100644
index cfdc97c010e..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/build.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-docker build . --rm -t jasper
\ No newline at end of file
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh
deleted file mode 100644
index 5c9c6a3f346..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/docker/launch.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/bin/bash
-
-DATA_DIR=$1
-CHECKPOINT_DIR=$2
-RESULT_DIR=$3
-
-docker run -it --rm \
- --gpus='"device=1"' \
- --shm-size=4g \
- --ulimit memlock=-1 \
- --ulimit stack=67108864 \
- -v "$DATA_DIR":/datasets \
- -v "$CHECKPOINT_DIR":/checkpoints/ \
- -v "$RESULT_DIR":/results/ \
- -v $PWD:/code \
- -v $PWD:/workspace/jasper \
- mlperf-rnnt-ref bash
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh
deleted file mode 100644
index ee322fe3043..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/download_librispeech.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/usr/bin/env bash
-
-DATA_SET="LibriSpeech"
-DATA_ROOT_DIR="/datasets"
-DATA_DIR="${DATA_ROOT_DIR}/${DATA_SET}"
-if [ ! -d "$DATA_DIR" ]
-then
- mkdir $DATA_DIR
- chmod go+rx $DATA_DIR
- python utils/download_librispeech.py utils/librispeech.csv $DATA_DIR -e ${DATA_ROOT_DIR}/
-else
- echo "Directory $DATA_DIR already exists."
-fi
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh
deleted file mode 100644
index fcd472fd9aa..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/evaluation.sh
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/bin/bash
-echo "Container nvidia build = " $NVIDIA_BUILD_ID
-
-DATA_DIR=${1:-"/datasets/LibriSpeech"}
-DATASET=${2:-"dev-clean"}
-MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"}
-RESULT_DIR=${4:-"/results"}
-CHECKPOINT=$5
-CREATE_LOGFILE=${6:-"true"}
-CUDNN_BENCHMARK=${7:-"false"}
-NUM_GPUS=${8:-1}
-PRECISION=${9:-"fp32"}
-NUM_STEPS=${10:-"-1"}
-SEED=${11:-0}
-BATCH_SIZE=${12:-64}
-
-
-if [ "$CREATE_LOGFILE" = "true" ] ; then
- export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS)
- printf -v TAG "jasper_evaluation_${DATASET}_%s_gbs%d" "$PRECISION" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log"
- printf "Logs written to %s\n" "$LOGFILE"
-fi
-
-
-
-PREC=""
-if [ "$PRECISION" = "fp16" ] ; then
- PREC="--fp16"
-elif [ "$PRECISION" = "fp32" ] ; then
- PREC=""
-else
- echo "Unknown argument"
- exit -2
-fi
-
-STEPS=""
-if [ "$NUM_STEPS" -gt 0 ] ; then
- STEPS=" --steps $NUM_STEPS"
-fi
-
-if [ "$CUDNN_BENCHMARK" = "true" ] ; then
- CUDNN_BENCHMARK=" --cudnn_benchmark"
-else
- CUDNN_BENCHMARK=""
-fi
-
-
-CMD=" inference.py "
-CMD+=" --batch_size $BATCH_SIZE "
-CMD+=" --dataset_dir $DATA_DIR "
-CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json "
-CMD+=" --model_toml $MODEL_CONFIG "
-CMD+=" --seed $SEED "
-CMD+=" --ckpt $CHECKPOINT "
-CMD+=" $CUDNN_BENCHMARK"
-CMD+=" $PREC "
-CMD+=" $STEPS "
-
-
-if [ "$NUM_GPUS" -gt 1 ] ; then
- CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD"
-else
- CMD="python3 $CMD"
-fi
-
-
-set -x
-if [ -z "$LOGFILE" ] ; then
- $CMD
-else
- (
- $CMD
- ) |& tee "$LOGFILE"
-fi
-set +x
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh
deleted file mode 100644
index 2d4474ce2b7..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference.sh
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/bin/bash
-echo "Container nvidia build = " $NVIDIA_BUILD_ID
-
-
-DATA_DIR=${1-"/datasets/LibriSpeech"}
-DATASET=${2:-"dev-clean"}
-MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"}
-RESULT_DIR=${4:-"/results"}
-CHECKPOINT=$5
-CREATE_LOGFILE=${6:-"true"}
-CUDNN_BENCHMARK=${7:-"false"}
-PRECISION=${8:-"fp32"}
-NUM_STEPS=${9:-"-1"}
-SEED=${10:-0}
-BATCH_SIZE=${11:-64}
-MODELOUTPUT_FILE=${12:-"none"}
-PREDICTION_FILE=${13:-"$RESULT_DIR/${DATASET}.predictions"}
-
-if [ "$CREATE_LOGFILE" = "true" ] ; then
- export GBS=$(expr $BATCH_SIZE)
- printf -v TAG "jasper_inference_${DATASET}_%s_gbs%d" "$PRECISION" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log"
- printf "Logs written to %s\n" "$LOGFILE"
-fi
-
-
-
-PREC=""
-if [ "$PRECISION" = "fp16" ] ; then
- PREC="--fp16"
-elif [ "$PRECISION" = "fp32" ] ; then
- PREC=""
-else
- echo "Unknown argument"
- exit -2
-fi
-
-PRED=""
-if [ "$PREDICTION_FILE" = "none" ] ; then
- PRED=""
-else
- PRED=" --save_prediction $PREDICTION_FILE"
-fi
-
-OUTPUT=""
-if [ "$MODELOUTPUT_FILE" = "none" ] ; then
- OUTPUT=" "
-else
- OUTPUT=" --logits_save_to $MODELOUTPUT_FILE"
-fi
-
-
-if [ "$CUDNN_BENCHMARK" = "true" ]; then
- CUDNN_BENCHMARK=" --cudnn_benchmark"
-else
- CUDNN_BENCHMARK=""
-fi
-
-STEPS=""
-if [ "$NUM_STEPS" -gt 0 ] ; then
- STEPS=" --steps $NUM_STEPS"
-fi
-
-CMD=" python inference.py "
-CMD+=" --batch_size $BATCH_SIZE "
-CMD+=" --dataset_dir $DATA_DIR "
-CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json "
-CMD+=" --model_toml $MODEL_CONFIG "
-CMD+=" --seed $SEED "
-CMD+=" --ckpt $CHECKPOINT "
-CMD+=" $CUDNN_BENCHMARK"
-CMD+=" $PRED "
-CMD+=" $OUTPUT "
-CMD+=" $PREC "
-CMD+=" $STEPS "
-
-
-set -x
-if [ -z "$LOGFILE" ] ; then
- $CMD
-else
- (
- $CMD
- ) |& tee "$LOGFILE"
-fi
-set +x
-echo "MODELOUTPUT_FILE: ${MODELOUTPUT_FILE}"
-echo "PREDICTION_FILE: ${PREDICTION_FILE}"
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh
deleted file mode 100644
index 7aeea84c159..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/inference_benchmark.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/bin/bash
-
-echo "Container nvidia build = " $NVIDIA_BUILD_ID
-
-
-DATA_DIR=${1:-"/datasets/LibriSpeech"}
-DATASET=${2:-"dev-clean"}
-MODEL_CONFIG=${3:-"configs/jasper10x5dr_sp_offline_specaugment.toml"}
-RESULT_DIR=${4:-"/results"}
-CHECKPOINT=$5
-CREATE_LOGFILE=${6:-"true"}
-CUDNN_BENCHMARK=${7:-"true"}
-PRECISION=${8:-"fp32"}
-NUM_STEPS=${9:-"-1"}
-MAX_DURATION=${10:-"36"}
-SEED=${11:-0}
-BATCH_SIZE=${12:-64}
-
-PREC=""
-if [ "$PRECISION" = "fp16" ] ; then
- PREC="--fp16"
-elif [ "$PRECISION" = "fp32" ] ; then
- PREC=""
-else
- echo "Unknown argument"
- exit -2
-fi
-STEPS=""
-if [ "$NUM_STEPS" -gt 0 ] ; then
- STEPS=" --steps $NUM_STEPS"
-fi
-if [ "$CUDNN_BENCHMARK" = "true" ] ; then
- CUDNN_BENCHMARK=" --cudnn_benchmark"
-else
- CUDNN_BENCHMARK=""
-fi
-
-CMD=" python inference_benchmark.py"
-CMD+=" --batch_size=$BATCH_SIZE"
-CMD+=" --model_toml=$MODEL_CONFIG"
-CMD+=" --seed=$SEED"
-CMD+=" --dataset_dir=$DATA_DIR"
-CMD+=" --val_manifest $DATA_DIR/librispeech-${DATASET}-wav.json "
-CMD+=" --ckpt=$CHECKPOINT"
-CMD+=" --max_duration=$MAX_DURATION"
-CMD+=" --pad_to=-1"
-CMD+=" $CUDNN_BENCHMARK"
-CMD+=" $PREC"
-CMD+=" $STEPS"
-
-
-if [ "$CREATE_LOGFILE" = "true" ] ; then
- export GBS=$(expr $BATCH_SIZE )
- printf -v TAG "jasper_inference_benchmark_%s_gbs%d" "$PRECISION" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log"
- printf "Logs written to %s\n" "$LOGFILE"
-fi
-
-set -x
-if [ -z "$LOGFILE" ] ; then
- $CMD
-else
- (
- $CMD
- ) |& tee "$LOGFILE"
- grep 'latency' "$LOGFILE"
-fi
-set +x
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh
deleted file mode 100644
index 7cfe5cc6a57..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/preprocess_librispeech.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/usr/bin/env bash
-
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/train-clean-100 \
- --dest_dir /datasets/LibriSpeech/train-clean-100-wav \
- --output_json /datasets/LibriSpeech/librispeech-train-clean-100-wav.json \
- --speed 0.9 1.1
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/train-clean-360 \
- --dest_dir /datasets/LibriSpeech/train-clean-360-wav \
- --output_json /datasets/LibriSpeech/librispeech-train-clean-360-wav.json \
- --speed 0.9 1.1
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/train-other-500 \
- --dest_dir /datasets/LibriSpeech/train-other-500-wav \
- --output_json /datasets/LibriSpeech/librispeech-train-other-500-wav.json \
- --speed 0.9 1.1
-
-
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/dev-clean \
- --dest_dir /datasets/LibriSpeech/dev-clean-wav \
- --output_json /datasets/LibriSpeech/librispeech-dev-clean-wav.json
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/dev-other \
- --dest_dir /datasets/LibriSpeech/dev-other-wav \
- --output_json /datasets/LibriSpeech/librispeech-dev-other-wav.json
-
-
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/test-clean \
- --dest_dir /datasets/LibriSpeech/test-clean-wav \
- --output_json /datasets/LibriSpeech/librispeech-test-clean-wav.json
-python ./utils/convert_librispeech.py \
- --input_dir /datasets/LibriSpeech/test-other \
- --dest_dir /datasets/LibriSpeech/test-other-wav \
- --output_json /datasets/LibriSpeech/librispeech-test-other-wav.json
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh
deleted file mode 100644
index d59ce8ebeb2..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train.sh
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Copyright (c) 2019, Myrtle Software Limited. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-#!/bin/bash
-echo "Container nvidia build = " $NVIDIA_BUILD_ID
-
-DATA_DIR=${1:-"/datasets/LibriSpeech"}
-MODEL_CONFIG=${2:-"configs/rnnt.toml"}
-RESULT_DIR=${3:-"/results"}
-CHECKPOINT=${4:-"none"}
-CREATE_LOGFILE=${5:-"true"}
-CUDNN_BENCHMARK=${6:-"true"}
-NUM_GPUS=${7:-8}
-PRECISION=${8:-"fp16"}
-EPOCHS=${9:-100}
-SEED=${10:-6}
-BATCH_SIZE=${11:-8}
-EVAL_BATCH_SIZE=${11:-2}
-LEARNING_RATE=${12:-"0.001"}
-LEARNING_RATE_WARMUP=${12:-"8000"}
-GRADIENT_ACCUMULATION_STEPS=${13:-1}
-LAUNCH_OPT=${LAUNCH_OPT:-"none"}
-
-
-PREC=""
-if [ "$PRECISION" = "fp16" ] ; then
- PREC="--fp16"
-elif [ "$PRECISION" = "fp32" ] ; then
- PREC=""
-else
- echo "Unknown argument"
- exit -2
-fi
-
-CUDNN=""
-if [ "$CUDNN_BENCHMARK" = "true" ] && [ "$PRECISION" = "fp16" ]; then
- CUDNN=" --cudnn"
-else
- CUDNN=""
-fi
-
-
-
-if [ "$CHECKPOINT" = "none" ] ; then
- CHECKPOINT=""
-else
- CHECKPOINT=" --ckpt=${CHECKPOINT}"
-fi
-
-
-CMD=" train.py"
-CMD+=" --batch_size=$BATCH_SIZE"
-CMD+=" --eval_batch_size=$EVAL_BATCH_SIZE"
-CMD+=" --num_epochs=$EPOCHS"
-CMD+=" --output_dir=$RESULT_DIR"
-CMD+=" --model_toml=$MODEL_CONFIG"
-CMD+=" --lr=$LEARNING_RATE"
-CMD+=" --lr_warmup=$LEARNING_RATE_WARMUP"
-CMD+=" --seed=$SEED"
-CMD+=" --optimizer=adam"
-CMD+=" --dataset_dir=$DATA_DIR"
-CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
-CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json"
-CMD+=" --weight_decay=1e-3"
-CMD+=" --save_freq=100"
-CMD+=" --eval_freq=1"
-CMD+=" --train_freq=250"
-CMD+=" --lr_decay"
-CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS "
-CMD+=" $CHECKPOINT"
-CMD+=" $PREC"
-CMD+=" $CUDNN"
-
-
-if [ "${LAUNCH_OPT}" != "none" ]; then
- CMD="python -m $LAUNCH_OPT $CMD"
-elif [ "$NUM_GPUS" -gt 1 ] ; then
- CMD="python3 -m multiproc --nproc_per_node=$NUM_GPUS $CMD"
-else
- CMD="python3 $CMD"
-fi
-
-
-if [ "$CREATE_LOGFILE" = "true" ] ; then
- export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS)
- printf -v TAG "rnnt_train_%s_gbs%d" "$PRECISION" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- LOGFILE=$RESULT_DIR/$TAG.$DATESTAMP.log
- printf "Logs written to %s\n" "$LOGFILE"
-fi
-
-set -x
-if [ -z "$LOGFILE" ] ; then
- $CMD
-else
- (
- $CMD
- ) |& tee $LOGFILE
-fi
-set +x
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh
deleted file mode 100644
index 7b5a33705ca..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/scripts/train_benchmark.sh
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-
-echo "Container nvidia build = " $NVIDIA_BUILD_ID
-
-DATA_DIR=${1:-"/datasets/LibriSpeech"}
-MODEL_CONFIG=${2:-"configs/jasper10x5dr_sp_offline_specaugment.toml"}
-RESULT_DIR=${3:-"/results"}
-CREATE_LOGFILE=${4:-"true"}
-CUDNN_BENCHMARK=${5:-"true"}
-NUM_GPUS=${6:-8}
-PRECISION=${7:-"fp16"}
-NUM_STEPS=${8:-"-1"}
-MAX_DURATION=${9:-16.7}
-SEED=${10:-0}
-BATCH_SIZE=${11:-64}
-LEARNING_RATE=${12:-"0.015"}
-GRADIENT_ACCUMULATION_STEPS=${13:-1}
-PRINT_FREQUENCY=${14:-1}
-
-
-PREC=""
-if [ "$PRECISION" = "fp16" ] ; then
- PREC=" --fp16"
-elif [ "$PRECISION" = "fp32" ] ; then
- PREC=""
-else
- echo "Unknown argument"
- exit -2
-fi
-
-STEPS=""
-if [ "$NUM_STEPS" -ne "-1" ] ; then
- STEPS=" --num_steps=$NUM_STEPS"
-elif [ "$NUM_STEPS" = "-1" ] ; then
- STEPS=""
-else
- echo "Unknown argument"
- exit -2
-fi
-
-CUDNN=""
-if [ "$CUDNN_BENCHMARK" = "true" ] ; then
- CUDNN=" --cudnn"
-else
- CUDNN=""
-fi
-
-
-CMD=" train.py"
-CMD+=" --batch_size=$BATCH_SIZE"
-CMD+=" --num_epochs=400"
-CMD+=" --output_dir=$RESULT_DIR"
-CMD+=" --model_toml=$MODEL_CONFIG"
-CMD+=" --lr=$LEARNING_RATE"
-CMD+=" --seed=$SEED"
-CMD+=" --optimizer=novograd"
-CMD+=" --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS"
-CMD+=" --dataset_dir=$DATA_DIR"
-CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
-CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,$DATA_DIR/librispeech-train-clean-360-wav.json,$DATA_DIR/librispeech-train-other-500-wav.json"
-CMD+=" --weight_decay=1e-3"
-CMD+=" --save_freq=100000"
-CMD+=" --eval_freq=100000"
-CMD+=" --max_duration=$MAX_DURATION"
-CMD+=" --pad_to_max"
-CMD+=" --train_freq=$PRINT_FREQUENCY"
-CMD+=" --lr_decay"
-CMD+=" $CUDNN"
-CMD+=" $PREC"
-CMD+=" $STEPS"
-
-if [ "$NUM_GPUS" -gt 1 ] ; then
- CMD="python3 -m torch.distributed.launch --nproc_per_node=$NUM_GPUS $CMD"
-else
- CMD="python3 $CMD"
-fi
-
-
-if [ "$CREATE_LOGFILE" = "true" ] ; then
- export GBS=$(expr $BATCH_SIZE \* $NUM_GPUS)
- printf -v TAG "jasper_train_benchmark_%s_gbs%d" "$PRECISION" $GBS
- DATESTAMP=`date +'%y%m%d%H%M%S'`
- LOGFILE="${RESULT_DIR}/${TAG}.${DATESTAMP}.log"
- printf "Logs written to %s\n" "$LOGFILE"
-
-fi
-
-if [ -z "$LOGFILE" ] ; then
-
- set -x
- $CMD
- set +x
-else
-
- set -x
- (
- $CMD
- ) |& tee "$LOGFILE"
-
- set +x
-
- mean_latency=`cat "$LOGFILE" | grep 'Step time' | awk '{print $3}' | tail -n +2 | egrep -o '[0-9.]+'| awk 'BEGIN {total=0} {total+=$1} END {printf("%.2f\n",total/NR)}'`
- mean_throughput=`python -c "print($BATCH_SIZE*$NUM_GPUS/${mean_latency})"`
- training_wer_per_pgu=`cat "$LOGFILE" | grep 'training_batch_WER'| awk '{print $2}' | tail -n 1 | egrep -o '[0-9.]+'`
- training_loss_per_pgu=`cat "$LOGFILE" | grep 'Loss@Step'| awk '{print $4}' | tail -n 1 | egrep -o '[0-9.]+'`
- final_eval_wer=`cat "$LOGFILE" | grep 'Evaluation WER'| tail -n 1 | egrep -o '[0-9.]+'`
- final_eval_loss=`cat "$LOGFILE" | grep 'Evaluation Loss'| tail -n 1 | egrep -o '[0-9.]+'`
-
- echo "max duration: $MAX_DURATION s" | tee -a "$LOGFILE"
- echo "mean_latency: $mean_latency s" | tee -a "$LOGFILE"
- echo "mean_throughput: $mean_throughput sequences/s" | tee -a "$LOGFILE"
- echo "training_wer_per_pgu: $training_wer_per_pgu" | tee -a "$LOGFILE"
- echo "training_loss_per_pgu: $training_loss_per_pgu" | tee -a "$LOGFILE"
- echo "final_eval_loss: $final_eval_loss" | tee -a "$LOGFILE"
- echo "final_eval_wer: $final_eval_wer" | tee -a "$LOGFILE"
-fi
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/__init__.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py
deleted file mode 100644
index 09ce9a3a4ed..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/convert_librispeech.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import os
-import glob
-import multiprocessing
-import json
-
-import pandas as pd
-
-from preprocessing_utils import parallel_preprocess
-
-parser = argparse.ArgumentParser(description='Preprocess LibriSpeech.')
-parser.add_argument('--input_dir', type=str, required=True,
- help='LibriSpeech collection input dir')
-parser.add_argument('--dest_dir', type=str, required=True,
- help='Output dir')
-parser.add_argument('--dest_list', type=str, required=False,
- help='a file contains list of files needs to be converted')
-parser.add_argument('--output_json', type=str, default='./',
- help='name of the output json file.')
-parser.add_argument('-s', '--speed', type=float, nargs='*',
- help='Speed perturbation ratio')
-parser.add_argument('--target_sr', type=int, default=None,
- help='Target sample rate. '
- 'defaults to the input sample rate')
-parser.add_argument('--overwrite', action='store_true',
- help='Overwrite file if exists')
-parser.add_argument('--parallel', type=int, default=multiprocessing.cpu_count(),
- help='Number of threads to use when processing audio files')
-args = parser.parse_args()
-
-args.input_dir = args.input_dir.rstrip('/')
-args.dest_dir = args.dest_dir.rstrip('/')
-
-
-def build_input_arr(input_dir):
- txt_files = glob.glob(os.path.join(input_dir, '**', '*.trans.txt'),
- recursive=True)
- input_data = []
- for txt_file in txt_files:
- rel_path = os.path.relpath(txt_file, input_dir)
- with open(txt_file) as fp:
- for line in fp:
- fname, _, transcript = line.partition(' ')
- input_data.append(dict(input_relpath=os.path.dirname(rel_path),
- input_fname=fname + '.flac',
- transcript=transcript))
- return input_data
-
-
-print("[%s] Scaning input dir..." % args.output_json)
-dataset = build_input_arr(input_dir=args.input_dir)
-
-if args.dest_list != None:
- dest_file = open (args.dest_list, "r")
- dest_list = dest_file.readlines()
-else:
- dest_list = None
-print("[%s] Converting audio files..." % args.output_json)
-dataset = parallel_preprocess(dataset=dataset,
- input_dir=args.input_dir,
- dest_dir=args.dest_dir,
- dest_list=dest_list,
- target_sr=args.target_sr,
- speed=args.speed,
- overwrite=args.overwrite,
- parallel=args.parallel)
-
-print("[%s] Generating json..." % args.output_json)
-df = pd.DataFrame(dataset, dtype=object)
-
-# Save json with python. df.to_json() produces back slashed in file paths
-dataset = df.to_dict(orient='records')
-with open(args.output_json, 'w') as fp:
- json.dump(dataset, fp, indent=2)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py
deleted file mode 100644
index f7e5eda1309..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_librispeech.py
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import argparse
-import pandas as pd
-
-from download_utils import download_file, md5_checksum, extract
-
-parser = argparse.ArgumentParser(
- description='Download, verify and extract dataset files')
-parser.add_argument('csv', type=str,
- help='CSV file with urls and checksums to download.')
-parser.add_argument('dest', type=str,
- help='Download destnation folder.')
-parser.add_argument('-e', type=str, default=None,
- help='Extraction destnation folder. Defaults to download folder if not provided')
-parser.add_argument('--skip_download', action='store_true',
- help='Skip downloading the files')
-parser.add_argument('--skip_checksum', action='store_true',
- help='Skip checksum')
-parser.add_argument('--skip_extract', action='store_true',
- help='Skip extracting files')
-args = parser.parse_args()
-args.e = args.e or args.dest
-
-
-df = pd.read_csv(args.csv, delimiter=',')
-
-
-if not args.skip_download:
- for url in df.url:
- fname = url.split('/')[-1]
- print("Downloading %s:" % fname)
- download_file(url=url, dest_folder=args.dest, fname=fname)
-else:
- print("Skipping file download")
-
-
-if not args.skip_checksum:
- for index, row in df.iterrows():
- url = row['url']
- md5 = row['md5']
- fname = url.split('/')[-1]
- fpath = os.path.join(args.dest, fname)
- print("Verifing %s: " % fname, end='')
- ret = md5_checksum(fpath=fpath, target_hash=md5)
- if not ret:
- raise ValueError(f"Checksum for {fname} failed!")
- else:
- print(f"Checksum correct for {fname}")
-else:
- print("Skipping checksum")
-
-
-if not args.skip_extract:
- for url in df.url:
- fname = url.split('/')[-1]
- fpath = os.path.join(args.dest, fname)
- print("Decompressing %s:" % fpath)
- extract(fpath=fpath, dest_folder=args.e)
-else:
- print("Skipping file extraction")
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py
deleted file mode 100644
index bda4193fbb0..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/download_utils.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import hashlib
-import requests
-import os
-import tarfile
-import tqdm
-
-
-def download_file(url, dest_folder, fname, overwrite=False):
- fpath = os.path.join(dest_folder, fname)
- if os.path.isfile(fpath):
- if overwrite:
- print("Overwriting existing file")
- else:
- print("File exists, skipping download.")
- return
-
- tmp_fpath = fpath + '.tmp'
-
- r = requests.get(url, stream=True)
- file_size = int(r.headers['Content-Length'])
- chunk_size = 1024 * 1024 # 1MB
- total_chunks = int(file_size / chunk_size)
-
- with open(tmp_fpath, 'wb') as fp:
- content_iterator = r.iter_content(chunk_size=chunk_size)
- chunks = tqdm.tqdm(content_iterator, total=total_chunks,
- unit='MB', desc=fpath, leave=True)
- for chunk in chunks:
- fp.write(chunk)
-
- os.rename(tmp_fpath, fpath)
-
-
-def md5_checksum(fpath, target_hash):
- file_hash = hashlib.md5()
- with open(fpath, "rb") as fp:
- for chunk in iter(lambda: fp.read(1024 * 1024), b""):
- file_hash.update(chunk)
- return file_hash.hexdigest() == target_hash
-
-
-def extract(fpath, dest_folder):
- if fpath.endswith('.tar.gz'):
- mode = 'r:gz'
- elif fpath.endswith('.tar'):
- mode = 'r:'
- else:
- raise IOError('fpath has unknown extention: %s' % fpath)
-
- with tarfile.open(fpath, mode) as tar:
- members = tar.getmembers()
- for member in tqdm.tqdm(iterable=members, total=len(members), leave=True):
- tar.extract(path=dest_folder, member=member)
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv
deleted file mode 100644
index 40dac4e0e61..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/inference_librispeech.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-url,md5
-http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1
-http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931
-http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9
-http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv
deleted file mode 100644
index b5e43b222e6..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech-inference.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-url,md5
-http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1
\ No newline at end of file
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv
deleted file mode 100644
index d48a9f8db72..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/librispeech.csv
+++ /dev/null
@@ -1,8 +0,0 @@
-url,md5
-http://www.openslr.org/resources/12/dev-clean.tar.gz,42e2234ba48799c1f50f24a7926300a1
-http://www.openslr.org/resources/12/dev-other.tar.gz,c8d0bcc9cca99d4f8b62fcc847357931
-http://www.openslr.org/resources/12/test-clean.tar.gz,32fa31d27d2e1cad72775fee3f4849a9
-http://www.openslr.org/resources/12/test-other.tar.gz,fb5a50374b501bb3bac4815ee91d3135
-http://www.openslr.org/resources/12/train-clean-100.tar.gz,2a93770f6d5c6c964bc36631d331a522
-http://www.openslr.org/resources/12/train-clean-360.tar.gz,c0e676e450a7ff2f54aeade5171606fa
-http://www.openslr.org/resources/12/train-other-500.tar.gz,d1a0fd59409feb2c614ce4d30c387708
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py
deleted file mode 100644
index e0ce22b9a13..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch/utils/preprocessing_utils.py
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import multiprocessing
-import functools
-
-import sox
-
-
-from tqdm import tqdm
-
-
-def preprocess(data, input_dir, dest_dir, dest_list, target_sr=None, speed=None,
- overwrite=True):
- speed = speed or []
- speed.append(1)
- speed = list(set(speed)) # Make uniqe
-
- input_fname = os.path.join(input_dir,
- data['input_relpath'],
- data['input_fname'])
- input_sr = sox.file_info.sample_rate(input_fname)
- target_sr = target_sr or input_sr
-
- os.makedirs(os.path.join(dest_dir, data['input_relpath']), exist_ok=True)
-
- output_dict = {}
- output_dict['transcript'] = data['transcript'].lower().strip()
- output_dict['files'] = []
-
- fname = os.path.splitext(data['input_fname'])[0]
- for s in speed:
- output_fname = fname + \
- '{}.wav'.format('' if s == 1 else '-{}'.format(s))
- output_fpath = os.path.join(dest_dir,
- data['input_relpath'],
- output_fname)
- output_rel_fpath = os.path.join('train-clean-100-wav',
- data['input_relpath'],
- output_fname+"\n")
-
- if dest_list != None and not output_rel_fpath in dest_list:
- return None
- if not os.path.exists(output_fpath) or overwrite:
- cbn = sox.Transformer().speed(factor=s).convert(target_sr)
- cbn.build(input_fname, output_fpath)
-
- file_info = sox.file_info.info(output_fpath)
- file_info['fname'] = os.path.join(os.path.basename(dest_dir),
- data['input_relpath'],
- output_fname)
- file_info['speed'] = s
- output_dict['files'].append(file_info)
-
- if s == 1:
- file_info = sox.file_info.info(output_fpath)
- output_dict['original_duration'] = file_info['duration']
- output_dict['original_num_samples'] = file_info['num_samples']
-
- return output_dict
-
-
-def parallel_preprocess(dataset, input_dir, dest_dir, dest_list, target_sr, speed, overwrite, parallel):
- with multiprocessing.Pool(parallel) as p:
- func = functools.partial(preprocess,
- input_dir=input_dir, dest_dir=dest_dir, dest_list=dest_list,
- target_sr=target_sr, speed=speed, overwrite=overwrite)
- dataset = list(tqdm(p.imap(func, dataset), total=len(dataset)))
- result = []
- for data in dataset:
- if data != None:
- result.append(data)
- return result
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py
deleted file mode 100644
index f054d5da6b7..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/pytorch_SUT.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# copyright (c) 2020, Cerebras Systems, Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import os
-sys.path.insert(0, os.path.join(os.getcwd(), "pytorch"))
-
-import array
-import numpy as np
-import toml
-import mlperf_loadgen as lg
-
-from QSL import AudioQSL, AudioQSLInMemory
-from helpers import add_blank_label
-
-import torch
-
-
-def load_and_migrate_checkpoint(ckpt_path):
- checkpoint = torch.load(ckpt_path, map_location="cpu")
- migrated_state_dict = {}
- for key, value in checkpoint['state_dict'].items():
- key = key.replace("joint_net", "joint.net")
- migrated_state_dict[key] = value
- del migrated_state_dict["audio_preprocessor.featurizer.fb"]
- del migrated_state_dict["audio_preprocessor.featurizer.window"]
- return migrated_state_dict
-
-
-class PytorchSUT:
- def __init__(self, config_toml, checkpoint_path, dataset_dir, manifest_filepath,
- perf_count, bf16=False, int8=False, configure_file=""):
- self.bf16 = bf16
- self.int8 = int8
- self.configure_file = configure_file
- config = toml.load(config_toml)
-
- dataset_vocab = config['labels']['labels']
- featurizer_config = config['input_eval']
-
- self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries,
- self.process_latencies)
- self.qsl = AudioQSLInMemory(dataset_dir,
- manifest_filepath,
- dataset_vocab,
- featurizer_config["sample_rate"],
- perf_count)
-
- from decoders import ScriptGreedyDecoder
- from model_separable_rnnt import RNNT
- from preprocessing import AudioPreprocessing
- rnnt_vocab = add_blank_label(dataset_vocab)
- self.audio_preprocessor = AudioPreprocessing(**featurizer_config)
- self.audio_preprocessor.eval()
- self.audio_preprocessor = torch.jit.script(self.audio_preprocessor)
- self.audio_preprocessor = torch.jit._recursive.wrap_cpp_module(
- torch._C._freeze_module(self.audio_preprocessor._c))
-
- model = RNNT(
- feature_config=featurizer_config,
- rnnt=config['rnnt'],
- num_classes=len(rnnt_vocab)
- )
- model.load_state_dict(load_and_migrate_checkpoint(checkpoint_path),
- strict=True)
-
- import intel_pytorch_extension as ipex
- if self.bf16:
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- ipex.core.enable_auto_dnnl()
- model = model.to(ipex.DEVICE)
-
- model.eval()
- """
- if not self.ipex:
- model.encoder = torch.jit.script(model.encoder)
- model.encoder = torch.jit._recursive.wrap_cpp_module(
- torch._C._freeze_module(model.encoder._c))
- model.prediction = torch.jit.script(model.prediction)
- model.prediction = torch.jit._recursive.wrap_cpp_module(
- torch._C._freeze_module(model.prediction._c))
- """
- model.joint = torch.jit.script(model.joint)
- model.joint = torch.jit._recursive.wrap_cpp_module(
- torch._C._freeze_module(model.joint._c))
- """
- if not self.ipex:
- model = torch.jit.script(model)
- """
-
- self.greedy_decoder = ScriptGreedyDecoder(len(rnnt_vocab) - 1, model)
-
- def issue_queries(self, query_samples):
- import intel_pytorch_extension as ipex
- conf = None
- if self.int8:
- conf = ipex.AmpConf(torch.int8, self.configure_file)
- for query_sample in query_samples:
- waveform = self.qsl[query_sample.index]
- assert waveform.ndim == 1
- waveform_length = np.array(waveform.shape[0], dtype=np.int64)
- waveform = np.expand_dims(waveform, 0)
- waveform_length = np.expand_dims(waveform_length, 0)
- with torch.no_grad():
- waveform = torch.from_numpy(waveform)
- waveform_length = torch.from_numpy(waveform_length)
- feature, feature_length = self.audio_preprocessor.forward((waveform, waveform_length))
- assert feature.ndim == 3
- assert feature_length.ndim == 1
- # RNNT can run in the following precision combinations:
- # encoder | decoder | --bf16 | --int8
- # --------------+-----------+-----------+---------
- # FP32 | FP32 | False | False
- # BF16 | BF16 | True | False
- # INT8 | BF16 | True | True
- # INT8 | FP32 | False | True
- if self.bf16 and not self.int8:
- # set bf16 mode globally for both encoder and decoder
- ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
- ipex.core.enable_auto_dnnl()
- feature = feature.to(ipex.DEVICE)
- feature_length = feature_length.to(ipex.DEVICE)
- feature = feature.permute(2, 0, 1)
-
- # _, _, transcript = self.greedy_decoder.forward(feature, feature_length)
- _, _, transcript, dur_enc, dur_dec = self.greedy_decoder.forward_single_batch(feature, feature_length, conf, self.int8, self.bf16)
-
- assert len(transcript) == 1
- response_array = array.array('q', transcript[0])
- bi = response_array.buffer_info()
- response = lg.QuerySampleResponse(query_sample.id, bi[0],
- bi[1] * response_array.itemsize)
- lg.QuerySamplesComplete([response])
-
- def flush_queries(self):
- pass
-
- def process_latencies(self, latencies_ns):
- print("Average latency (ms) per query:")
- print(np.mean(latencies_ns)/1000000.0)
- print("Median latency (ms): ")
- print(np.percentile(latencies_ns, 50)/1000000.0)
- print("90 percentile latency (ms): ")
- print(np.percentile(latencies_ns, 90)/1000000.0)
-
- def __del__(self):
- lg.DestroySUT(self.sut)
- print("Finished destroying SUT.")
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py
deleted file mode 100644
index f82c651aa21..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Copyright 2020 The MLPerf Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-
-import argparse
-import mlperf_loadgen as lg
-import subprocess
-
-import os
-from pathlib import Path
-import re
-import toml
-import torch
-
-import numpy as np
-from numpy.core.numeric import full
-from pytorch_SUT import PytorchSUT
-
-MLPERF_CONF = Path(os.path.dirname(os.path.realpath(__file__))) / "./mlperf.conf"
-MLPERF_CONF = MLPERF_CONF.resolve()
-
-
-def get_args():
- parser = argparse.ArgumentParser()
- parser.add_argument("--backend", choices=["pytorch"], default="pytorch", help="Backend")
- parser.add_argument("--scenario", choices=["SingleStream", "Offline", "Server"], default="Offline", help="Scenario")
- # parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass")
- parser.add_argument("--mlperf_conf", default=str(MLPERF_CONF), help="mlperf rules config")
- parser.add_argument("--user_conf", default="user.conf", help="user config for user LoadGen settings such as target QPS")
- parser.add_argument("--pytorch_config_toml", default="pytorch/configs/rnnt.toml")
- parser.add_argument("--pytorch_checkpoint", required=True)
- parser.add_argument("--dataset_dir", required=True)
- parser.add_argument("--manifest", required=True)
- parser.add_argument("--perf_count", type=int, default=None)
- parser.add_argument("--profile", choices=["True", "Split", "False"], default="False")
- parser.add_argument("--bf16", dest='bf16', action='store_true')
- parser.add_argument("--int8", dest='int8', action='store_true')
- parser.add_argument("--log_dir", required=True)
- parser.add_argument("--configure_path", default="")
- parser.add_argument('--tune', dest='tune', action='store_true',
- help='tune best int8 model with Neural Compressor on calibration dataset')
- parser.add_argument('--benchmark', dest='benchmark', action='store_true',
- help='run benchmark')
- parser.add_argument("--accuracy_only", dest='accuracy_only', action='store_true',
- help='For accuracy measurement only.')
- parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH',
- help='path to checkpoint tuned by Neural Compressor (default: ./)')
- args = parser.parse_args()
- return args
-
-
-scenario_map = {
- "SingleStream": lg.TestScenario.SingleStream,
- "Offline": lg.TestScenario.Offline,
- "Server": lg.TestScenario.Server,
-}
-
-
-def main():
- args = get_args()
- print ("Checking args: int8={}, bf16={}".format(args.int8, args.bf16))
- print(args)
-
- settings = lg.TestSettings()
- settings.scenario = scenario_map[args.scenario]
- settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario)
- settings.FromConfig(args.user_conf, "rnnt", args.scenario)
-
- if args.accuracy_only:
- settings.mode = lg.TestMode.AccuracyOnly
- else:
- settings.mode = lg.TestMode.PerformanceOnly
-
- log_path = args.log_dir
- os.makedirs(log_path, exist_ok=True)
- log_output_settings = lg.LogOutputSettings()
- log_output_settings.outdir = log_path
- log_output_settings.copy_summary_to_stdout = True
- log_settings = lg.LogSettings()
- log_settings.log_output = log_output_settings
-
- pattern = ['accuracy=\d+.\d+', 'samples_per_query : \d+', 'Mean latency.*', 'Samples per second\\s*: \d+.\d+']
-
- def eval_func(model):
- print("Running Loadgen test...")
- fullpath = None
- use_int8 = False
- settings.mode = lg.TestMode.AccuracyOnly
- for path, dirs, files in os.walk('nc_workspace'):
- if 'ipex_config_tmp.json' in files:
- fullpath = os.path.join(path, 'ipex_config_tmp.json')
- use_int8 = True
- break
- sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
- args.dataset_dir, args.manifest, args.perf_count,
- args.bf16, use_int8, fullpath)
- lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)
- cmd = f"python3 accuracy_eval.py --log_dir {log_path} \
- --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
- out = subprocess.check_output(cmd, shell=True)
- out = out.decode()
- regex_accu = re.compile(pattern[0])
- accu = float(regex_accu.findall(out)[0].split('=')[1])
- print('Accuracy: %.3f ' % (accu))
- return accu
-
- if args.tune:
- import shutil
- shutil.rmtree('nc_workspace', ignore_errors=True)
- sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
- args.dataset_dir, args.manifest, args.perf_count,
- True, False, None)
- model = sut.greedy_decoder._model.encoder
-
- class NC_dataloader(object):
- def __init__(self, sut):
- self.sut = sut
- self.batch_size = 1
-
- def __iter__(self):
- for i in range(0, self.sut.qsl.count, self.batch_size):
- waveform = self.sut.qsl[i]
- assert waveform.ndim == 1
- waveform_length = np.array(waveform.shape[0], dtype=np.int64)
- waveform = np.expand_dims(waveform, 0)
- waveform_length = np.expand_dims(waveform_length, 0)
- with torch.no_grad():
- waveform = torch.from_numpy(waveform)
- waveform_length = torch.from_numpy(waveform_length)
- feature, feature_length = self.sut.audio_preprocessor.forward((waveform, waveform_length))
- assert feature.ndim == 3
- assert feature_length.ndim == 1
- feature = feature.permute(2, 0, 1)
- yield (feature, feature_length), None
-
- from neural_compressor.experimental import Quantization, common
- calib_dataloader = NC_dataloader(sut)
- quantizer = Quantization("./conf.yaml")
- quantizer.model = common.Model(model)
- quantizer.calib_dataloader = calib_dataloader
- quantizer.eval_func = eval_func
- q_model = quantizer.fit()
- q_model.save(args.tuned_checkpoint)
- return
-
- if args.backend == "pytorch":
- config_file = None
- if args.int8:
- config_file = os.path.join(args.tuned_checkpoint, "best_configure.json")
- assert os.path.exists(config_file), "there is no ipex config file, Please tune with Neural Compressor first!"
- sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
- args.dataset_dir, args.manifest, args.perf_count,
- args.bf16, args.int8, config_file)
- else:
- raise ValueError("Unknown backend: {:}".format(args.backend))
-
- print("Running Loadgen test...")
- lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)
-
- if args.accuracy_only:
- cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
- print(f"Running accuracy script: {cmd}")
- out = subprocess.check_output(cmd, shell=True)
- out = out.decode()
- regex_accu = re.compile(pattern[0])
- accu = float(regex_accu.findall(out)[0].split('=')[1])
- print('Accuracy: %.3f ' % (accu))
- else:
- file_path = os.path.join(log_path, 'mlperf_log_summary.txt')
- f = open(file_path, 'r', encoding='UTF-8')
- file_content = f.read()
- f.close()
- regex_batch = re.compile(pattern[1])
- regex_late = re.compile(pattern[2])
- regex_perf = re.compile(pattern[3], flags=re.IGNORECASE)
- latency_per_sample = float(regex_late.findall(file_content)[0].split(': ')[1])
- samples_per_s = float(regex_perf.findall(file_content)[0].split(': ')[1])
- print('Batch size = %d' % 1)
- print('Latency: %.3f ms' % (latency_per_sample / 10**6))
- print('Throughput: %.3f samples/sec' % (samples_per_s))
-
- print("Done!")
-
-
-
-if __name__ == "__main__":
- main()
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh
deleted file mode 100644
index 1044e6d4662..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run.sh
+++ /dev/null
@@ -1,91 +0,0 @@
-#/bin/bash
-
-set -euo pipefail
-
-root_dir=`pwd`
-work_dir=$root_dir/mlperf-rnnt-librispeech
-local_data_dir=$work_dir/local_data
-librispeech_download_dir=$local_data_dir/LibriSpeech
-stage=3
-
-mkdir -p $work_dir $local_data_dir $librispeech_download_dir
-
-install_dir=third_party/install
-mkdir -p $install_dir
-install_dir=$(readlink -f $install_dir)
-
-set +u
-source "$($CONDA_EXE info --base)/etc/profile.d/conda.sh"
-set -u
-
-# stage -1: install dependencies
-if [[ $stage -le -1 ]]; then
- conda env create --force --file environment.yml
-
- set +u
- source "$(conda info --base)/etc/profile.d/conda.sh"
- conda activate mlperf-rnnt
- set -u
-
- # We need to convert .flac files to .wav files via sox. Not all sox installs have flac support, so we install from source.
- wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz -O third_party/flac-1.3.2.tar.xz
- (cd third_party; tar xf flac-1.3.2.tar.xz; cd flac-1.3.2; ./configure --prefix=$install_dir && make && make install)
-
- #wget https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz -O third_party/sox-14.4.2.tar.gz
- (cd third_party; tar zxf sox-14.4.2.tar.gz; cd sox-14.4.2; LDFLAGS="-L${install_dir}/lib" CFLAGS="-I${install_dir}/include" ./configure --prefix=$install_dir --with-flac && make && make install)
-
- (cd $(git rev-parse --show-toplevel)/loadgen; python setup.py install)
-fi
-
-export PATH="$install_dir/bin/:$PATH"
-
-set +u
-conda activate mlperf-rnnt
-set -u
-
-# stage 0: download model. Check checksum to skip?
-if [[ $stage -le 0 ]]; then
- wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O $work_dir/rnnt.pt
-fi
-
-# stage 1: download data. This will hae a non-zero exit code if the
-# checksum is incorrect.
-if [[ $stage -le 1 ]]; then
- python pytorch/utils/download_librispeech.py \
- pytorch/utils/librispeech-inference.csv \
- $librispeech_download_dir \
- -e $local_data_dir --skip_download
-fi
-
-if [[ $stage -le 2 ]]; then
- python pytorch/utils/convert_librispeech.py \
- --input_dir $librispeech_download_dir/dev-clean \
- --dest_dir $local_data_dir/dev-clean-wav \
- --output_json $local_data_dir/dev-clean-wav.json
-fi
-
-if [[ $stage -le 3 ]]; then
- for backend in pytorch; do
- for accuracy in ""; do
- for scenario in Offline; do
- log_dir=${work_dir}/${scenario}_${backend}
- if [ ! -z ${accuracy} ]; then
- log_dir+=_accuracy
- fi
- log_dir+=rerun
-
- python run.py --backend pytorch \
- --dataset_dir $local_data_dir \
- --manifest $local_data_dir/dev-clean-wav.json \
- --pytorch_config_toml pytorch/configs/rnnt.toml \
- --pytorch_checkpoint $work_dir/rnnt.pt \
- --scenario ${scenario} \
- --backend ${backend} \
- --log_dir ${log_dir} \
- ${accuracy} &
-
- done
- done
- done
- wait
-fi
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh
deleted file mode 100644
index b0c755ba11e..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_benchmark.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-set -x
-
-export TCMALLOC_DIR=$CONDA_PREFIX/lib
-export KMP_BLOCKTIME=1
-# tcmalloc:
-#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so
-
-# jemalloc
-export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so
-#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
-export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000"
-
-PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'`
-SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'`
-IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' `
-export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH
-
-sockets=`lscpu | grep Socket | awk '{print $2}'`
-cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'`
-export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760
-
-function main {
-
- init_params "$@"
- run_benchmark
-
-}
-
-# init params
-function init_params {
- tuned_checkpoint=saved_results
- scenario=Offline
- backend=pytorch
- for var in "$@"
- do
- case $var in
- --topology=*)
- topology=$(echo $var |cut -f2 -d=)
- ;;
- --dataset_location=*)
- dataset_location=$(echo $var |cut -f2 -d=)
- ;;
- --input_model=*)
- input_model=$(echo $var |cut -f2 -d=)
- ;;
- --mode=*)
- mode=$(echo $var |cut -f2 -d=)
- ;;
- --batch_size=*)
- batch_size=$(echo $var |cut -f2 -d=)
- ;;
- --iters=*)
- iters=$(echo ${var} |cut -f2 -d=)
- ;;
- --int8=*)
- int8=$(echo ${var} |cut -f2 -d=)
- ;;
- --config=*)
- tuned_checkpoint=$(echo $var |cut -f2 -d=)
- ;;
- *)
- echo "Error: No such parameter: ${var}"
- exit 1
- ;;
- esac
- done
-
-}
-
-
-# run_benchmark
-function run_benchmark {
- if [[ ${mode} == "accuracy" ]]; then
- mode_cmd=" --accuracy_only"
- elif [[ ${mode} == "benchmark" ]]; then
- mode_cmd=" --benchmark --user_conf user_benchmark.sh"
- else
- echo "Error: No such mode: ${mode}"
- exit 1
- fi
-
- extra_cmd=""
- if [[ ${int8} == "true" ]]; then
- extra_cmd=$extra_cmd" --int8 --bf16"
- fi
- echo $extra_cmd
-
- python run.py --dataset_dir ${dataset_location} \
- --manifest $dataset_location/dev-clean-wav.json \
- --pytorch_config_toml pytorch/configs/rnnt.toml \
- --pytorch_checkpoint $input_model \
- --scenario ${scenario} \
- --backend ${backend} \
- --log_dir output \
- --tuned_checkpoint $tuned_checkpoint \
- $mode_cmd \
- ${extra_cmd}
-}
-
-main "$@"
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh
deleted file mode 100644
index ca8fa2469ae..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_inference_cpu.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-export TCMALLOC_DIR=$CONDA_PREFIX/lib
-export KMP_BLOCKTIME=1
-# tcmalloc:
-#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so
-
-# jemalloc
-export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so
-#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
-export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000"
-
-PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'`
-SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'`
-IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' `
-export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH
-
-sockets=`lscpu | grep Socket | awk '{print $2}'`
-cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'`
-
-root_dir=`pwd`
-work_dir=$root_dir/mlperf-rnnt-librispeech
-local_data_dir=$work_dir/local_data
-configure_path=calibration_result.json
-
-scenario=Offline
-machine_conf=offline.conf
-backend=pytorch
-for arg in $@; do
- case ${arg} in
- --accuracy) accuracy="--accuracy_only";;
- --debug) debug="--debug";;
- --profile*)
- if [[ $(echo ${arg} | cut -f2 -d=) == "--profile" ]];then
- profile="--profile True"
- else
- profile="--profile $(echo ${arg} | cut -f2 -d=)"
- fi;;
- --server)
- scenario=Server
- machine_conf=server.conf;;
- --verbose*) verbose="--verbose $(echo ${arg} | cut -f2 -d=)";;
- --warmup) warmup="--warmup";;
- *) echo "Error: No such parameter: ${arg}" exit 1;;
- esac
-done
-
-log_dir=${work_dir}/${scenario}_${backend}
-if [ ! -z ${accuracy} ]; then
- log_dir+=_accuracy
-fi
-log_dir+=rerun
-
-export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760
-
-python run.py --dataset_dir $local_data_dir \
- --manifest $local_data_dir/dev-clean-wav.json \
- --pytorch_config_toml pytorch/configs/rnnt.toml \
- --pytorch_checkpoint $work_dir/rnnt.pt \
- --scenario ${scenario} \
- --backend ${backend} \
- --log_dir output \
- --configure_path $configure_path \
- --machine_conf $machine_conf \
- ${accuracy} \
- ${warmup} \
- ${debug} \
- ${profile} \
- ${verbose} \
- --bf16 \
- --int8
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh
deleted file mode 100644
index 0a03c6f2e3d..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/run_tuning.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-export TCMALLOC_DIR=$CONDA_PREFIX/lib
-export KMP_BLOCKTIME=1
-# tcmalloc:
-#export LD_PRELOAD=$TCMALLOC_DIR/libtcmalloc.so
-
-# jemalloc
-export LD_PRELOAD=$TCMALLOC_DIR/libjemalloc.so:$TCMALLOC_DIR/libiomp5.so
-#export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"
-export MALLOC_CONF="background_thread:true,dirty_decay_ms:8000,muzzy_decay_ms:8000"
-
-PYTHON_VERSION=`python -c 'import sys; print ("{}.{}".format(sys.version_info.major, sys.version_info.minor))'`
-SITE_PACKAGES=`python -c 'import site; print (site.getsitepackages()[0])'`
-IPEX_VERSION=`conda list |grep torch-ipex | awk '{print $2}' `
-export LD_LIBRARY_PATH=$SITE_PACKAGES/torch_ipex-${IPEX_VERSION}-py$PYTHON_VERSION-linux-x86_64.egg/lib/:$LD_LIBRARY_PATH
-
-sockets=`lscpu | grep Socket | awk '{print $2}'`
-cores=`lscpu | grep Core.*per\ socket: | awk '{print $4}'`
-
-function main {
-
- init_params "$@"
- run_tuning
-
-}
-# init params
-function init_params {
- tuned_checkpoint=saved_results
- scenario=Offline
- backend=pytorch
- for var in "$@"
- do
- case $var in
- --topology=*)
- topology=$(echo $var |cut -f2 -d=)
- ;;
- --dataset_location=*)
- dataset_location=$(echo $var |cut -f2 -d=)
- ;;
- --input_model=*)
- input_model=$(echo $var |cut -f2 -d=)
- ;;
- --output_model=*)
- tuned_checkpoint=$(echo $var |cut -f2 -d=)
- ;;
- --debug) debug="--debug";;
- --profile*)
- if [[ $(echo ${arg} | cut -f2 -d=) == "--profile" ]];then
- profile="--profile True"
- else
- profile="--profile $(echo ${arg} | cut -f2 -d=)"
- fi;;
- --server)
- scenario=Server
- ;;
- *)
- echo "Error: No such parameter: ${var}"
- exit 1
- ;;
- esac
- done
-}
-
-export DNNL_PRIMITIVE_CACHE_CAPACITY=10485760
-
-# run_tuning
-function run_tuning {
-
- python run.py --dataset_dir $dataset_location \
- --manifest $dataset_location/dev-clean-wav.json \
- --pytorch_config_toml pytorch/configs/rnnt.toml \
- --pytorch_checkpoint $input_model \
- --scenario ${scenario} \
- --backend ${backend} \
- --log_dir output \
- --tune \
- --tuned_checkpoint $tuned_checkpoint \
- --int8 \
- --bf16
-}
-
-main "$@"
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf
deleted file mode 100644
index b1cc9d4fb24..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user.conf
+++ /dev/null
@@ -1,3 +0,0 @@
-rnnt.Server.target_qps = 95
-#rnn.Server.min_query_count = 226170
-rnnt.Offline.target_qps = 10
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf
deleted file mode 100644
index ae467b6f866..00000000000
--- a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_static/ipex/user_benchmark.conf
+++ /dev/null
@@ -1,5 +0,0 @@
-rnnt.Server.target_qps = 95
-#rnn.Server.min_query_count = 226170
-# rnnt.Offline.target_qps = 10
-*.Offline.performance_issue_same = 1
-*.Offline.performance_issue_unique = 1
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
index db54f8c3978..8dc19e23fcd 100644
--- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
+++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
@@ -15,7 +15,10 @@ pip install neural-compressor
```shell
pip install intel-tensorflow==2.4.0
```
-
+### 3. Install tensorflow_model_optimization
+```shell
+pip install tensorflow_model_optimization
+```
## Run Command
```shell
python resnet_v2.py # to get the quantized ResNet-V2 model which will be saved into './trained_qat_model'.