Skip to content

Commit

Permalink
clean up transformers scripts (#17179)
Browse files Browse the repository at this point in the history
(1) Remove class BertOptimizationOptions that has been deprecated a long
time ago
(2) Move sys path setttings to `__init__.py`, and update imports
(3) Fix bert_perf_test to run properly.
(4) Fix a onnx path in a whisper test case
(5) Fix a few typos
(6) Update comments in bert_perf_test regarding to graph inputs
  • Loading branch information
tianleiwu authored Aug 18, 2023
1 parent 78b3565 commit d65aa54
Show file tree
Hide file tree
Showing 51 changed files with 182 additions and 251 deletions.
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Status RelPosAttnBias<T>::ComputeInternal(OpKernelContext* context) const {
const int64_t key_len = *key_length->Data<int64_t>();

if (query_len != key_len) {
ORT_THROW("Relatvie position bias currently only support query length equal to key length in Self Attention.");
ORT_THROW("Relative position bias currently only support query length equal to key length in Self Attention.");
}

Tensor* output = context->Output(0, {1, num_heads, query_len, key_len});
Expand Down
10 changes: 1 addition & 9 deletions onnxruntime/python/tools/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2"))

import convert_to_onnx # noqa: E402, F401

# added for backward compatible
import gpt2_helper # noqa: E402, F401

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5"))
sys.path.append(os.path.dirname(__file__))
8 changes: 2 additions & 6 deletions onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@
import os
import timeit
from datetime import datetime
from enum import Enum # noqa: F401

import numpy
import onnx # noqa: F401
import psutil
from benchmark_helper import allocateOutputBuffers # noqa: F401
from benchmark_helper import (
ConfigModifier,
OptimizerInfo,
Expand All @@ -65,6 +62,7 @@
setup_logger,
)
from fusion_options import FusionOptions
from huggingface_models import MODEL_CLASSES, MODELS
from onnx_exporter import (
create_onnxruntime_input,
export_onnx_model_from_pt,
Expand All @@ -76,16 +74,14 @@

logger = logging.getLogger("")

from huggingface_models import MODEL_CLASSES, MODELS # noqa: E402

cpu_count = psutil.cpu_count(logical=False)

# Set OMP environment variable before importing onnxruntime or torch.
if "OMP_NUM_THREADS" not in os.environ:
os.environ["OMP_NUM_THREADS"] = str(cpu_count)

import torch # noqa: E402
from transformers import AutoConfig, AutoModel, AutoTokenizer, GPT2Model, LxmertConfig # noqa: E402, F401
from transformers import AutoConfig, AutoTokenizer, LxmertConfig # noqa: E402


def run_onnxruntime(
Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/python/tools/transformers/bert_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------

# This tool measures the inference performance of onnxruntime or onnxruntime-gpu python package on Bert model.

# The input model shall have exactly three inputs. The model is either fully optimized (with EmbedLayerNormalization node),
# or with reasonable input names (one input name has 'mask' substring, another has 'token' or 'segment' substring).
# See get_bert_inputs function in bert_test_data.py for more information.
# This tool measures the inference performance of onnxruntime on BERT-like model with inputs like input_ids,
# token_type_ids (optional), and attention_mask (optional).
#
# If the model does not have exactly three inputs like above, you might need specify names of inputs with
# --input_ids_name, --segment_ids_name and --input_mask_name

# Example command to run test on batch_size 1 and 2 for a model on GPU:
# python bert_perf_test.py --model bert.onnx --batch_size 1 2 --sequence_length 128 --use_gpu --samples 1000 --test_times 1
Expand Down Expand Up @@ -270,7 +270,7 @@ def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op
results, latency_list = onnxruntime_inference(session, all_inputs, output_names)
all_latency_list.extend(latency_list)

# latency in miliseconds
# latency in milliseconds
latency_ms = np.array(all_latency_list) * 1000

average_latency = statistics.mean(latency_ms)
Expand Down
8 changes: 7 additions & 1 deletion onnxruntime/python/tools/transformers/bert_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,12 @@ def create_and_save_test_data(

import onnxruntime

session = onnxruntime.InferenceSession(model)
providers = (
["CUDAExecutionProvider", "CPUExecutionProvider"]
if "CUDAExecutionProvider" in onnxruntime.get_available_providers()
else ["CPUExecutionProvider"]
)
session = onnxruntime.InferenceSession(model, providers=providers)
output_names = [output.name for output in session.get_outputs()]

for i, inputs in enumerate(all_inputs):
Expand Down Expand Up @@ -629,6 +634,7 @@ def main():
args.only_input_tensors,
args.average_sequence_length,
args.random_sequence_length,
args.mask_type,
)

print("Test data is saved to directory:", output_dir)
Expand Down
18 changes: 6 additions & 12 deletions onnxruntime/python/tools/transformers/convert_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import logging
import math
import os
import sys
import time
from enum import Enum
from pathlib import Path
Expand All @@ -54,9 +53,14 @@
import numpy as np
import onnx
import torch
from benchmark_helper import Precision
from benchmark_helper import Precision, setup_logger
from fusion_utils import NumpyHelper
from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx
from models.gpt2.gpt2_helper import PRETRAINED_GPT2_MODELS
from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models
from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS
from onnx import GraphProto, ModelProto, TensorProto
from onnx_model import OnnxModel
from transformers import (
GPT2Config,
GPT2LMHeadModel,
Expand All @@ -70,16 +74,6 @@

from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_available_providers

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2"))
from gpt2_helper import PRETRAINED_GPT2_MODELS # noqa: E402
from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx # noqa: E402

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5"))
from benchmark_helper import setup_logger # noqa: E402
from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models # noqa: E402
from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS # noqa: E402
from onnx_model import OnnxModel # noqa: E402

logger = logging.getLogger("")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------

from logging import getLogger # noqa: F401

from fusion_base import Fusion
from onnx import helper
from onnx_model import OnnxModel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from fusion_base import Fusion
from fusion_utils import FusionUtils
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
from logging import getLogger

import numpy as np
from fusion_base import Fusion # noqa: F401
from fusion_gpt_attention import FusionGptAttentionPastBase
from fusion_utils import FusionUtils # noqa: F401
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
# --------------------------------------------------------------------------
from logging import getLogger

import numpy as np # noqa: F401
from fusion_base import Fusion
from fusion_utils import FusionUtils # noqa: F401
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/python/tools/transformers/machine_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
import json
import logging
import platform
import sys # noqa: F401
from os import environ
from typing import Dict, List, Tuple, Union # noqa: F401
from typing import Dict, List

import cpuinfo
import psutil
Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/python/tools/transformers/models/bart/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
8 changes: 8 additions & 0 deletions onnxruntime/python/tools/transformers/models/bert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
8 changes: 8 additions & 0 deletions onnxruntime/python/tools/transformers/models/gpt2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,22 @@
import csv
import logging
import os
import sys
from datetime import datetime

import psutil
import torch
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from packaging import version
from transformers import AutoConfig

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

from benchmark_helper import ( # noqa: E402
from benchmark_helper import (
Precision,
create_onnxruntime_session,
get_ort_environment_variables,
prepare_environment,
setup_logger,
)
from quantize_helper import QuantizeHelper # noqa: E402
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from packaging import version
from quantize_helper import QuantizeHelper
from transformers import AutoConfig
from transformers import __version__ as transformers_version

logger = logging.getLogger("")

Expand Down Expand Up @@ -169,8 +166,6 @@ def parse_arguments(argv=None):


def main(args):
from transformers import __version__ as transformers_version

if version.parse(transformers_version) < version.parse(
"3.1.0"
): # past_key_values name does not exist in 3.0.2 or older
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,31 @@
"""

import argparse
import csv
import json
import logging
import os
import shutil
import sys
from pathlib import Path

import numpy
import torch
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from gpt2_tester import Gpt2Tester
from packaging import version
from transformers import AutoConfig

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

from benchmark_helper import ( # noqa: E402
from benchmark_helper import (
Precision,
create_onnxruntime_session,
get_ort_environment_variables,
prepare_environment,
setup_logger,
)
from quantize_helper import QuantizeHelper # noqa: E402
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from gpt2_tester import Gpt2Tester
from packaging import version
from quantize_helper import QuantizeHelper
from transformers import AutoConfig
from transformers import __version__ as transformers_version

from onnxruntime import __version__ as ort_version

logger = logging.getLogger("")

Expand Down Expand Up @@ -242,8 +244,6 @@ def get_latency_name(batch_size, sequence_length, past_sequence_length):

def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: str = "gpt2_parity_results.csv"):
result = {}
from transformers import __version__ as transformers_version

if version.parse(transformers_version) < version.parse(
"3.1.0"
): # past_key_values name does not exist in 3.0.2 or older
Expand All @@ -253,8 +253,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
setup_logger(args.verbose)

if not experiment_name:
import sys

experiment_name = " ".join(argv if argv else sys.argv[1:])

if args.tolerance == 0:
Expand Down Expand Up @@ -366,8 +364,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
output_path = onnx_model_paths["int8"]

if args.output.endswith(".onnx") and output_path != args.output and not args.use_external_data_format:
import shutil

shutil.move(output_path, args.output)
output_path = args.output

Expand Down Expand Up @@ -424,10 +420,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
logger.info(f"fp16 conversion parameters:{fp16_params}")

# Write results to file
import csv

from onnxruntime import __version__ as ort_version

latency_name = get_latency_name(batch_size, sequence_length, past_sequence_length)
csv_file_existed = os.path.exists(csv_filename)
with open(csv_filename, mode="a", newline="") as csv_file:
Expand Down
Loading

0 comments on commit d65aa54

Please sign in to comment.