Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[relay] use time_evaluator for measurement #4191

Merged
merged 1 commit into from
Oct 24, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 45 additions & 20 deletions tests/python/relay/benchmarking/benchmark_vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@
from tvm.contrib import graph_runtime
from tvm import relay
from tvm.relay import testing
from tvm.relay import vm
from tvm.relay import vmobj as _obj


def benchmark_execution(mod,
params,
measure=False,
measure=True,
data_shape=(1, 3, 224, 224),
out_shape=(1, 1000),
dtype='float32'):
def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
with relay.build_config(opt_level=1):
dtype='float32',
model="unknown"):
def get_graph_runtime_output(mod, data, params, target, ctx,
dtype='float32', number=2, repeat=20):
with relay.build_config(opt_level=3):
graph, lib, params = relay.build(mod, target, params=params)

m = graph_runtime.create(graph, lib, ctx)
Expand All @@ -41,60 +45,81 @@ def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

if measure:
print("Evaluate graph runtime inference time cost...")
print("Evaluate graph runtime inference cost of {} on "
"{}".format(model, repr(ctx)))
ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
# Measure in millisecond.
prof_res = np.array(ftimer().results) * 1000
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
print("Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))

return out.asnumpy()

def get_tvm_vm_output(mod, data, params, target, ctx, dtype='float32'):
ex = relay.create_executor('vm', mod=mod, ctx=ctx)
result = ex.evaluate()(data, **params)
def get_vm_output(mod, data, params, target, ctx, dtype='float32',
number=2, repeat=20):
with relay.build_config(opt_level=3):
exe = vm.compile(mod, target, params=params)
rly_vm = vm.VirtualMachine(exe)
rly_vm.init(ctx)
result = rly_vm.run(data)

if measure:
print("Evaluate vm inference cost of {} on {}".format(model,
repr(ctx)))
ftimer = rly_vm.mod.time_evaluator("invoke", ctx, number=number,
repeat=repeat)
# Measure in millisecond.
prof_res = np.array(ftimer("main", _obj.Tensor(data)).results) * 1000
print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))

return result.asnumpy().astype(dtype)

# random input
data = np.random.uniform(size=data_shape).astype(dtype)
target = "llvm"
ctx = tvm.cpu(0)

tvm_out = get_tvm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
vm_out = get_tvm_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
tvm_out = get_graph_runtime_output(mod, tvm.nd.array(data.astype(dtype)),
params, target, ctx, dtype)
vm_out = get_vm_output(mod, tvm.nd.array(data.astype(dtype)), params,
target, ctx, dtype)
tvm.testing.assert_allclose(vm_out, tvm_out, rtol=1e-5, atol=1e-5)


def test_mlp():
image_shape = (1, 1, 28, 28)
mod, params = testing.mlp.get_workload(1)
benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10))
benchmark_execution(mod, params, data_shape=image_shape, out_shape=(1, 10),
model="mlp")


def test_vgg():
for n in [11, 16]:
mod, params = testing.vgg.get_workload(1, num_layers=n)
benchmark_execution(mod, params)
model = "vgg" + str(n)
benchmark_execution(mod, params, model=model)


def test_resnet():
for n in [18, 50]:
mod, params = testing.resnet.get_workload(batch_size=1, num_layers=n)
benchmark_execution(mod, params, True)
model = "resnet" + str(n)
benchmark_execution(mod, params, model=model)


def test_squeezenet():
for version in ['1.0', '1.1']:
mod, params = testing.squeezenet.get_workload(version=version)
benchmark_execution(mod, params)
model = "squeezenet" + version
benchmark_execution(mod, params, model=model)


def test_inception_v3():
image_shape = (3, 299, 299)
mod, params = testing.inception_v3.get_workload(image_shape=image_shape)
benchmark_execution(mod, params, data_shape=(1, 3, 299, 299))
benchmark_execution(mod, params, data_shape=(1, 3, 299, 299),
model="inception_v3")


def test_dqn():
Expand All @@ -112,7 +137,7 @@ def test_dcgan():

def test_mobilenet():
mod, params = testing.mobilenet.get_workload(batch_size=1)
benchmark_execution(mod, params)
benchmark_execution(mod, params, model="mobilenet")

# TODO: enable when the low building performance (several minutes) fixed.
def test_mobilenet_nhwc():
Expand All @@ -124,7 +149,7 @@ def test_mobilenet_nhwc():

def test_densenet():
mod, params = testing.densenet.get_workload(batch_size=1)
benchmark_execution(mod, params)
benchmark_execution(mod, params, model="densenet")


if __name__ == '__main__':
Expand Down