Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support specifying an execution provider in benchmark script #10453

Merged
merged 6 commits into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,14 @@
from transformers import (AutoConfig, AutoTokenizer, AutoModel, GPT2Model, LxmertConfig)


def run_onnxruntime(use_gpu, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
def run_onnxruntime(use_gpu, provider, model_names, model_class, precision, num_threads, batch_sizes, sequence_lengths,
repeat_times, input_counts, optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite,
disable_ort_io_binding, use_raw_attention_mask, model_fusion_statistics, model_source):
import onnxruntime

results = []
if use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()):
if (use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()) and
('ROCMExecutionProvider' not in onnxruntime.get_available_providers())):
logger.error(
"Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance."
)
Expand Down Expand Up @@ -105,6 +106,7 @@ def run_onnxruntime(use_gpu, model_names, model_class, precision, num_threads, b

ort_session = create_onnxruntime_session(onnx_model_file,
use_gpu,
provider,
enable_all_optimization=True,
num_threads=num_threads,
verbose=verbose)
Expand Down Expand Up @@ -425,7 +427,13 @@ def parse_arguments():
default=os.path.join('.', 'onnx_models'),
help="Directory to store onnx models")

parser.add_argument("-g", "--use_gpu", required=False, action="store_true", help="Run on cuda device")
parser.add_argument("-g", "--use_gpu", required=False, action="store_true", help="Run on gpu device")

parser.add_argument("--provider",
required=False,
type=str,
default=None,
help="Execution provider to use")

parser.add_argument(
"-p",
Expand Down Expand Up @@ -545,7 +553,7 @@ def main():
if enable_onnxruntime:
try:
use_raw_attention_mask = True
results += run_onnxruntime(args.use_gpu, args.models, args.model_class, args.precision, num_threads,
results += run_onnxruntime(args.use_gpu, args.provider, args.models, args.model_class, args.precision, num_threads,
args.batch_sizes, args.sequence_lengths, args.test_times, args.input_counts,
args.optimize_onnx, args.validate_onnx, args.cache_dir, args.onnx_dir,
args.verbose, args.overwrite, args.disable_ort_io_binding,
Expand Down
12 changes: 9 additions & 3 deletions onnxruntime/python/tools/transformers/benchmark_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def __str__(self):

def create_onnxruntime_session(onnx_model_path,
use_gpu,
provider=None,
enable_all_optimization=True,
num_threads=-1,
enable_profiling=False,
verbose=False,
use_dml=False):
verbose=False):
session = None
try:
from onnxruntime import SessionOptions, InferenceSession, GraphOptimizationLevel, __version__ as onnxruntime_version
Expand All @@ -68,8 +68,14 @@ def create_onnxruntime_session(onnx_model_path,

logger.debug(f"Create session for onnx model: {onnx_model_path}")
if use_gpu:
if use_dml:
if provider == 'dml':
execution_providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
elif provider == 'rocm':
execution_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'migraphx':
execution_providers = ['MIGraphXExecutionProvider', 'ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'tensorrt':
execution_providers = ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
else:
Expand Down
44 changes: 38 additions & 6 deletions onnxruntime/python/tools/transformers/bert_perf_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TestSetting:
test_cases: int
test_times: int
use_gpu: bool
provider: str
intra_op_num_threads: int
seed: int
verbose: bool
Expand All @@ -50,7 +51,7 @@ class ModelSetting:
opt_level: int


def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization_level=None):
def create_session(model_path, use_gpu, provider, intra_op_num_threads, graph_optimization_level=None):
import onnxruntime

if use_gpu and ('CUDAExecutionProvider' not in onnxruntime.get_available_providers()):
Expand All @@ -61,8 +62,19 @@ def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization
if intra_op_num_threads is None and graph_optimization_level is None:
session = onnxruntime.InferenceSession(model_path)
else:
execution_providers = ['CPUExecutionProvider'
] if not use_gpu else ['CUDAExecutionProvider', 'CPUExecutionProvider']
if use_gpu:
if provider == 'dml':
execution_providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
elif provider == 'rocm':
execution_providers = ['ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'migraphx':
execution_providers = ['MIGraphXExecutionProvider', 'ROCMExecutionProvider', 'CPUExecutionProvider']
elif provider == 'tensorrt':
execution_providers = ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
else:
execution_providers = ['CPUExecutionProvider']

sess_options = onnxruntime.SessionOptions()
sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
Expand All @@ -86,7 +98,21 @@ def create_session(model_path, use_gpu, intra_op_num_threads, graph_optimization
session = onnxruntime.InferenceSession(model_path, sess_options, providers=execution_providers)

if use_gpu:
assert 'CUDAExecutionProvider' in session.get_providers()
if provider == 'dml':
assert 'DmlExecutionProvider' in session.get_providers()
elif provider == 'rocm':
assert 'ROCMExecutionProvider' in session.get_providers()
elif provider == 'migraphx':
assert 'MIGraphXExecutionProvider' in session.get_providers()
assert 'ROCMExecutionProvider' in session.get_providers()
elif provider == 'tensorrt':
assert 'TensorrtExecutionProvider' in session.get_providers()
assert 'CUDAExecutionProvider' in session.get_providers()
else:
assert 'CUDAExecutionProvider' in session.get_providers()
else:
assert 'CPUExecutionProvider' in session.get_providers()

return session


Expand Down Expand Up @@ -117,7 +143,7 @@ def to_string(model_path, session, test_setting):


def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op_num_threads):
session = create_session(model_setting.model_path, test_setting.use_gpu, intra_op_num_threads,
session = create_session(model_setting.model_path, test_setting.use_gpu, test_setting.provider, intra_op_num_threads,
model_setting.opt_level)
output_names = [output.name for output in session.get_outputs()]

Expand Down Expand Up @@ -239,6 +265,12 @@ def parse_arguments():
parser.add_argument('--use_gpu', required=False, action='store_true', help="use GPU")
parser.set_defaults(use_gpu=False)

parser.add_argument("--provider",
required=False,
type=str,
default=None,
help="Execution provider to use")

parser.add_argument('-n',
'--intra_op_num_threads',
required=False,
Expand Down Expand Up @@ -276,7 +308,7 @@ def main():

for batch_size in batch_size_set:
test_setting = TestSetting(batch_size, args.sequence_length, args.samples, args.test_times, args.use_gpu,
args.intra_op_num_threads, args.seed, args.verbose)
args.provider, args.intra_op_num_threads, args.seed, args.verbose)

print("test setting", test_setting)
run_performance(model_setting, test_setting, perf_results)
Expand Down
15 changes: 9 additions & 6 deletions onnxruntime/python/tools/transformers/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ def parse_arguments(argv=None):
parser.add_argument('-g', '--use_gpu', required=False, action='store_true', help="use GPU")
parser.set_defaults(use_gpu=False)

parser.add_argument('-d', '--use_dml', required=False, action='store_true', help="use DML")
parser.set_defaults(use_dml=False)
parser.add_argument('--provider',
required=False,
type=str,
default='cuda',
help="Execution provider to use")

parser.add_argument(
'--basic_optimization',
Expand All @@ -108,15 +111,15 @@ def parse_arguments(argv=None):
return parser.parse_args(argv)


def run_profile(onnx_model_path, use_gpu, basic_optimization, thread_num, all_inputs, use_dml):
def run_profile(onnx_model_path, use_gpu, provider, basic_optimization, thread_num, all_inputs):
from benchmark_helper import create_onnxruntime_session

session = create_onnxruntime_session(onnx_model_path,
use_gpu,
provider,
enable_all_optimization=not basic_optimization,
num_threads=thread_num,
enable_profiling=True,
use_dml=use_dml)
enable_profiling=True)

for inputs in all_inputs:
_ = session.run(None, inputs)
Expand Down Expand Up @@ -604,7 +607,7 @@ def run(args):
else: # default
all_inputs = create_dummy_inputs(onnx_model, args.batch_size, args.sequence_length, args.samples)

profile_file = run_profile(args.model, args.use_gpu, args.basic_optimization, args.thread_num, all_inputs, args.use_dml)
profile_file = run_profile(args.model, args.use_gpu, args.provider, args.basic_optimization, args.thread_num, all_inputs)

return profile_file

Expand Down