Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean up transformers scripts #17179

Merged
merged 8 commits into from
Aug 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cuda/bert/relative_attn_bias.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Status RelPosAttnBias<T>::ComputeInternal(OpKernelContext* context) const {
const int64_t key_len = *key_length->Data<int64_t>();

if (query_len != key_len) {
ORT_THROW("Relatvie position bias currently only support query length equal to key length in Self Attention.");
ORT_THROW("Relative position bias currently only support query length equal to key length in Self Attention.");
}

Tensor* output = context->Output(0, {1, num_heads, query_len, key_len});
Expand Down
10 changes: 1 addition & 9 deletions onnxruntime/python/tools/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2"))

import convert_to_onnx # noqa: E402, F401

# added for backward compatible
import gpt2_helper # noqa: E402, F401

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5"))
sys.path.append(os.path.dirname(__file__))
8 changes: 2 additions & 6 deletions onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,9 @@
import os
import timeit
from datetime import datetime
from enum import Enum # noqa: F401

import numpy
import onnx # noqa: F401
import psutil
from benchmark_helper import allocateOutputBuffers # noqa: F401
from benchmark_helper import (
ConfigModifier,
OptimizerInfo,
Expand All @@ -65,6 +62,7 @@
setup_logger,
)
from fusion_options import FusionOptions
from huggingface_models import MODEL_CLASSES, MODELS
from onnx_exporter import (
create_onnxruntime_input,
export_onnx_model_from_pt,
Expand All @@ -76,16 +74,14 @@

logger = logging.getLogger("")

from huggingface_models import MODEL_CLASSES, MODELS # noqa: E402

cpu_count = psutil.cpu_count(logical=False)

# Set OMP environment variable before importing onnxruntime or torch.
if "OMP_NUM_THREADS" not in os.environ:
os.environ["OMP_NUM_THREADS"] = str(cpu_count)

import torch # noqa: E402
from transformers import AutoConfig, AutoModel, AutoTokenizer, GPT2Model, LxmertConfig # noqa: E402, F401
from transformers import AutoConfig, AutoTokenizer, LxmertConfig # noqa: E402


def run_onnxruntime(
Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/python/tools/transformers/bert_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------

# This tool measures the inference performance of onnxruntime or onnxruntime-gpu python package on Bert model.

# The input model shall have exactly three inputs. The model is either fully optimized (with EmbedLayerNormalization node),
# or with reasonable input names (one input name has 'mask' substring, another has 'token' or 'segment' substring).
# See get_bert_inputs function in bert_test_data.py for more information.
# This tool measures the inference performance of onnxruntime on BERT-like model with inputs like input_ids,
# token_type_ids (optional), and attention_mask (optional).
#
# If the model does not have exactly three inputs like above, you might need specify names of inputs with
# --input_ids_name, --segment_ids_name and --input_mask_name

# Example command to run test on batch_size 1 and 2 for a model on GPU:
# python bert_perf_test.py --model bert.onnx --batch_size 1 2 --sequence_length 128 --use_gpu --samples 1000 --test_times 1
Expand Down Expand Up @@ -270,7 +270,7 @@ def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op
results, latency_list = onnxruntime_inference(session, all_inputs, output_names)
all_latency_list.extend(latency_list)

# latency in miliseconds
# latency in milliseconds
latency_ms = np.array(all_latency_list) * 1000

average_latency = statistics.mean(latency_ms)
Expand Down
8 changes: 7 additions & 1 deletion onnxruntime/python/tools/transformers/bert_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,12 @@ def create_and_save_test_data(

import onnxruntime

session = onnxruntime.InferenceSession(model)
providers = (
["CUDAExecutionProvider", "CPUExecutionProvider"]
kunal-vaishnavi marked this conversation as resolved.
Show resolved Hide resolved
if "CUDAExecutionProvider" in onnxruntime.get_available_providers()
else ["CPUExecutionProvider"]
)
session = onnxruntime.InferenceSession(model, providers=providers)
output_names = [output.name for output in session.get_outputs()]

for i, inputs in enumerate(all_inputs):
Expand Down Expand Up @@ -629,6 +634,7 @@ def main():
args.only_input_tensors,
args.average_sequence_length,
args.random_sequence_length,
args.mask_type,
)

print("Test data is saved to directory:", output_dir)
Expand Down
18 changes: 6 additions & 12 deletions onnxruntime/python/tools/transformers/convert_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import logging
import math
import os
import sys
import time
from enum import Enum
from pathlib import Path
Expand All @@ -54,9 +53,14 @@
import numpy as np
import onnx
import torch
from benchmark_helper import Precision
from benchmark_helper import Precision, setup_logger
from fusion_utils import NumpyHelper
from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx
from models.gpt2.gpt2_helper import PRETRAINED_GPT2_MODELS
from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models
from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS
from onnx import GraphProto, ModelProto, TensorProto
from onnx_model import OnnxModel
from transformers import (
GPT2Config,
GPT2LMHeadModel,
Expand All @@ -70,16 +74,6 @@

from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions, get_available_providers

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2"))
from gpt2_helper import PRETRAINED_GPT2_MODELS # noqa: E402
from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx # noqa: E402

sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5"))
from benchmark_helper import setup_logger # noqa: E402
from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models # noqa: E402
from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS # noqa: E402
from onnx_model import OnnxModel # noqa: E402

logger = logging.getLogger("")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
# Licensed under the MIT License.
# --------------------------------------------------------------------------

from logging import getLogger # noqa: F401

from fusion_base import Fusion
from onnx import helper
from onnx_model import OnnxModel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
from fusion_base import Fusion
from fusion_utils import FusionUtils
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@
from logging import getLogger

import numpy as np
from fusion_base import Fusion # noqa: F401
from fusion_gpt_attention import FusionGptAttentionPastBase
from fusion_utils import FusionUtils # noqa: F401
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
# --------------------------------------------------------------------------
from logging import getLogger

import numpy as np # noqa: F401
from fusion_base import Fusion
from fusion_utils import FusionUtils # noqa: F401
from onnx import TensorProto, helper, numpy_helper # noqa: F401
from onnx import helper
from onnx_model import OnnxModel

logger = getLogger(__name__)
Expand Down
3 changes: 1 addition & 2 deletions onnxruntime/python/tools/transformers/machine_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
import json
import logging
import platform
import sys # noqa: F401
from os import environ
from typing import Dict, List, Tuple, Union # noqa: F401
from typing import Dict, List

import cpuinfo
import psutil
Expand Down
12 changes: 12 additions & 0 deletions onnxruntime/python/tools/transformers/models/bart/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
8 changes: 8 additions & 0 deletions onnxruntime/python/tools/transformers/models/bert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
8 changes: 8 additions & 0 deletions onnxruntime/python/tools/transformers/models/gpt2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import os.path
import sys

sys.path.append(os.path.dirname(__file__))

transformers_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
if transformers_dir not in sys.path:
sys.path.append(transformers_dir)
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,22 @@
import csv
import logging
import os
import sys
from datetime import datetime

import psutil
import torch
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from packaging import version
from transformers import AutoConfig

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

from benchmark_helper import ( # noqa: E402
from benchmark_helper import (
Precision,
create_onnxruntime_session,
get_ort_environment_variables,
prepare_environment,
setup_logger,
)
from quantize_helper import QuantizeHelper # noqa: E402
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from packaging import version
from quantize_helper import QuantizeHelper
from transformers import AutoConfig
from transformers import __version__ as transformers_version

logger = logging.getLogger("")

Expand Down Expand Up @@ -169,8 +166,6 @@ def parse_arguments(argv=None):


def main(args):
from transformers import __version__ as transformers_version

if version.parse(transformers_version) < version.parse(
"3.1.0"
): # past_key_values name does not exist in 3.0.2 or older
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,31 @@
"""

import argparse
import csv
import json
import logging
import os
import shutil
import sys
from pathlib import Path

import numpy
import torch
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from gpt2_tester import Gpt2Tester
from packaging import version
from transformers import AutoConfig

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

from benchmark_helper import ( # noqa: E402
from benchmark_helper import (
Precision,
create_onnxruntime_session,
get_ort_environment_variables,
prepare_environment,
setup_logger,
)
from quantize_helper import QuantizeHelper # noqa: E402
from gpt2_helper import DEFAULT_TOLERANCE, MODEL_CLASSES, PRETRAINED_GPT2_MODELS, Gpt2Helper
from gpt2_tester import Gpt2Tester
from packaging import version
from quantize_helper import QuantizeHelper
from transformers import AutoConfig
from transformers import __version__ as transformers_version

from onnxruntime import __version__ as ort_version

logger = logging.getLogger("")

Expand Down Expand Up @@ -242,8 +244,6 @@ def get_latency_name(batch_size, sequence_length, past_sequence_length):

def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename: str = "gpt2_parity_results.csv"):
result = {}
from transformers import __version__ as transformers_version

if version.parse(transformers_version) < version.parse(
"3.1.0"
): # past_key_values name does not exist in 3.0.2 or older
Expand All @@ -253,8 +253,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
setup_logger(args.verbose)

if not experiment_name:
import sys

experiment_name = " ".join(argv if argv else sys.argv[1:])

if args.tolerance == 0:
Expand Down Expand Up @@ -366,8 +364,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
output_path = onnx_model_paths["int8"]

if args.output.endswith(".onnx") and output_path != args.output and not args.use_external_data_format:
import shutil

shutil.move(output_path, args.output)
output_path = args.output

Expand Down Expand Up @@ -424,10 +420,6 @@ def main(argv=None, experiment_name: str = "", run_id: str = "0", csv_filename:
logger.info(f"fp16 conversion parameters:{fp16_params}")

# Write results to file
import csv

from onnxruntime import __version__ as ort_version

latency_name = get_latency_name(batch_size, sequence_length, past_sequence_length)
csv_file_existed = os.path.exists(csv_filename)
with open(csv_filename, mode="a", newline="") as csv_file:
Expand Down
Loading