Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support specifying an execution provider in benchmark script #10453

Merged
merged 6 commits into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,14 @@
from transformers import (AutoConfig, AutoTokenizer, AutoModel, GPT2Model, LxmertConfig)


def run_onnxruntime(use_gpu, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
def run_onnxruntime(use_gpu, provider, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
repeat_times, input_counts, optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
disable_ort_io_binding, use_raw_attention_mask, model_fusion_statistics, model_source):
import onnxruntime

results = []
if use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()):
if (use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()) and
('ROCMExecutionProvider' not in onnxruntime.get_available_providers())):
logger.error(
"Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance."
)
Expand Down Expand Up @@ -105,6 +106,7 @@ def run_onnxruntime(use_gpu, model_names, model_class, precision, num_threads, b

ort_session = create_onnxruntime_session(onnx_model_file,
use_gpu,
provider,
enable_all_optimization=True,
num_threads=num_threads,
verbose=verbose)
Expand Down Expand Up @@ -425,7 +427,13 @@ def parse_arguments():
default=os.path.join('.', 'onnx_models'),
help="Directory to store onnx models")

parser.add_argument("-g", "--use_gpu", required=False, action="store_true", help="Run on cuda device")
parser.add_argument("-g", "--use_gpu", required=False, action="store_true", help="Run on gpu device")

parser.add_argument("--provider",
required=False,
type=str,
default=None,
help="Execution provider to use")

parser.add_argument(
"-p",
Expand Down Expand Up @@ -545,7 +553,7 @@ def main():
if enable_onnxruntime:
try:
use_raw_attention_mask = True
results += run_onnxruntime(args.use_gpu, args.models, args.model_class, args.precision, num_threads,
results += run_onnxruntime(args.use_gpu, args.provider, args.models, args.model_class, args.precision, num_threads,
args.batch_sizes, args.sequence_lengths, args.test_times, args.input_counts,
args.optimize_onnx, args.validate_onnx, args.cache_dir, args.onnx_dir,
args.verbose, args.overwrite, args.disable_ort_io_binding,
Expand Down
18 changes: 13 additions & 5 deletions onnxruntime/python/tools/transformers/benchmark_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def __str__(self):

def create_onnxruntime_session(onnx_model_path,
use_gpu,
provider=None,
enable_all_optimization=True,
num_threads=-1,
enable_profiling=False,
verbose=False,
use_dml=False):
verbose=False):
session = None
try:
from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version
Expand All @@ -68,8 +68,16 @@ def create_onnxruntime_session(onnx_model_path,

logger.debug(f"Create session for onnx model: {onnx_model_path}")
if use_gpu:
if use_dml:
if provider == 'dml':
execution_providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
elif provider == 'rocm':
execution_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'migraphx':
execution_providers = ['MIGraphXExecutionProvider', 'ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'cuda':
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
elif provider == 'tensorrt':
execution_providers = ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
else:
Expand All @@ -89,7 +97,7 @@ def setup_logger(verbose=True):
logging.getLogger("transformers").setLevel(logging.WARNING)


def prepare_environment(cache_dir, output_dir, use_gpu, use_dml=False):
def prepare_environment(cache_dir, output_dir, use_gpu, provider=None):
if cache_dir and not os.path.exists(cache_dir):
os.makedirs(cache_dir)

Expand All @@ -98,7 +106,7 @@ def prepare_environment(cache_dir, output_dir, use_gpu, use_dml=False):

import onnxruntime
if use_gpu:
if use_dml:
if provider == 'dml':
assert 'DmlExecutionProvider' in onnxruntime.get_available_providers(
), "Please install onnxruntime-directml package to test GPU inference."

Expand Down
48 changes: 42 additions & 6 deletions onnxruntime/python/tools/transformers/bert_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TestSetting:
test_cases: int
test_times: int
use_gpu: bool
provider: str
intra_op_num_threads: int
seed: int
verbose: bool
Expand All @@ -50,7 +51,7 @@ class ModelSetting:
opt_level: int


def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization_level=None):
def create_session(model_path, use_gpu, provider, intra_op_num_threads, graph_optimization_level=None):
import onnxruntime

if use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()):
Expand All @@ -61,8 +62,21 @@ def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization
if intra_op_num_threads is None and graph_optimization_level is None:
session = onnxruntime.InferenceSession(model_path)
else:
execution_providers = ['CPUExecutionProvider'
] if not use_gpu else ['CUDAExecutionProvider', 'CPUExecutionProvider']
if use_gpu:
if provider == 'dml':
execution_providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
elif provider == 'rocm':
execution_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'migraphx':
execution_providers = ['MIGraphXExecutionProvider', 'ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'cuda':
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
elif provider == 'tensorrt':
execution_providers = ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CPUExecutionProvider']

sess_options = onnxruntime.SessionOptions()
sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
Expand All @@ -86,7 +100,23 @@ def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization
session = onnxruntime.InferenceSession(model_path, sess_options, providers=execution_providers)

if use_gpu:
assert 'CUDAExecutionProvider' in session.get_providers()
if provider == 'dml':
assert 'DmlExecutionProvider' in session.get_providers()
elif provider == 'rocm':
assert 'ROCMExecutionProvider' in session.get_providers()
elif provider == 'migraphx':
assert 'MIGraphXExecutionProvider' in session.get_providers()
assert 'ROCMExecutionProvider' in session.get_providers()
elif provider == 'cuda':
assert 'CUDAExecutionProvider' in session.get_providers()
elif provider == 'tensorrt':
assert 'TensorrtExecutionProvider' in session.get_providers()
assert 'CUDAExecutionProvider' in session.get_providers()
else:
assert 'CUDAExecutionProvider' in session.get_providers()
else:
assert 'CPUExecutionProvider' in session.get_providers()

return session


Expand Down Expand Up @@ -117,7 +147,7 @@ def to_string(model_path, session, test_setting):


def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op_num_threads):
session = create_session(model_setting.model_path, test_setting.use_gpu, intra_op_num_threads,
session = create_session(model_setting.model_path, test_setting.use_gpu, test_setting.provider, intra_op_num_threads,
model_setting.opt_level)
output_names = [output.name for output in session.get_outputs()]

Expand Down Expand Up @@ -239,6 +269,12 @@ def parse_arguments():
parser.add_argument('--use_gpu', required=False, action='store_true', help="use GPU")
parser.set_defaults(use_gpu=False)

parser.add_argument("--provider",
required=False,
type=str,
default=None,
help="Execution provider to use")

parser.add_argument('-n',
'--intra_op_num_threads',
required=False,
Expand Down Expand Up @@ -276,7 +312,7 @@ def main():

for batch_size in batch_size_set:
test_setting = TestSetting(batch_size, args.sequence_length, args.samples, args.test_times, args.use_gpu,
args.intra_op_num_threads, args.seed, args.verbose)
args.provider, args.intra_op_num_threads, args.seed, args.verbose)

print("test setting", test_setting)
run_performance(model_setting, test_setting, perf_results)
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/python/tools/transformers/huggingface_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

# Maps model class name to a tuple of model class
MODEL_CLASSES = [
'AutoModel', 'AutoModelWithLMHead', 'AutoModelForSequenceClassification', 'AutoModelForQuestionAnswering'
'AutoModel', 'AutoModelWithLMHead', 'AutoModelForSequenceClassification', 'AutoModelForQuestionAnswering',
'AutoModelForCausalLM',
]

# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/python/tools/transformers/onnx_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_
model_class_name = 'TF' + model_class_name

transformers_module = __import__("transformers", fromlist=[model_class_name])
logger.info(f"Model class name: {model_class_name}")
model_class = getattr(transformers_module, model_class_name)

return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir)
Expand Down
15 changes: 9 additions & 6 deletions onnxruntime/python/tools/transformers/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ def parse_arguments(argv=None):
parser.add_argument('-g', '--use_gpu', required=False, action='store_true', help="use GPU")
parser.set_defaults(use_gpu=False)

parser.add_argument('-d', '--use_dml', required=False, action='store_true', help="use DML")
parser.set_defaults(use_dml=False)
parser.add_argument('--provider',
required=False,
type=str,
default='cuda',
help="Execution provider to use")

parser.add_argument(
'--basic_optimization',
Expand All @@ -108,15 +111,15 @@ def parse_arguments(argv=None):
return parser.parse_args(argv)


def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, all_inputs, use_dml):
def run_profile(onnx_model_path, use_gpu, provider, basic_optimization, thread_num, all_inputs):
from benchmark_helper import create_onnxruntime_session

session = create_onnxruntime_session(onnx_model_path,
use_gpu,
provider,
enable_all_optimization=not basic_optimization,
num_threads=thread_num,
enable_profiling=True,
use_dml=use_dml)
enable_profiling=True)

for inputs in all_inputs:
_ = session.run(None, inputs)
Expand Down Expand Up @@ -604,7 +607,7 @@ def run(args):
else: # default
all_inputs = create_dummy_inputs(onnx_model, args.batch_size, args.sequence_length, args.samples)

profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, all_inputs, args.use_dml)
profile_file = run_profile(args.model, args.use_gpu, args.provider, args.basic_optimization, args.thread_num, all_inputs)

return profile_file

Expand Down