From 62024d845dadb5883639cbe32f7561fa34a03f64 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Fri, 13 Sep 2024 15:36:33 -0700 Subject: [PATCH] Use ms for number report (#5362) Summary: ns * 1e-6 = ms Example output: (9.36ms inference mv2_xnnpack) ``` [{"actualValue":9.3611927,"benchmarkModel":{"backend":"","name":"mv2_xnnpack","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"avg_inference_latency(ms)","targetValue":0.0},{"actualValue":27.418698,"benchmarkModel":{"backend":"","name":"mv2_xnnpack","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"model_load_time(ms)","targetValue":0.0},{"actualValue":0.0,"benchmarkModel":{"backend":"","name":"mv2_xnnpack","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"load_status","targetValue":0.0}] ``` and for LLM (12.34 tps mocked number) ``` [{"actualValue":0.0,"benchmarkModel":{"backend":"","name":"et_exported_llama","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"load_status","targetValue":0.0},{"actualValue":3375.450312,"benchmarkModel":{"backend":"","name":"et_exported_llama","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"model_load_time(ms)","targetValue":0.0},{"actualValue":8220.467966,"benchmarkModel":{"backend":"","name":"et_exported_llama","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"generate_time(ms)","targetValue":0.0},{"actualValue":12.3456,"benchmarkModel":{"backend":"","name":"et_exported_llama","quantization":""},"deviceInfo":{"arch":"SM-S926U1","availMem":0,"device":"samsung","os":"Android 14","totalMem":0},"metric":"token_per_sec","targetValue":0.0}] ``` Pull Request resolved: https://github.com/pytorch/executorch/pull/5362 Reviewed By: huydhn Differential Revision: D62665456 Pulled By: kirklandsign fbshipit-source-id: 3d2444d90aef82bd5f331838a89319be9e9797e5 --- .../example/executorchllamademo/LlmBenchmarkRunner.java | 8 ++++---- .../java/org/pytorch/minibench/BenchmarkActivity.java | 8 ++++---- .../java/org/pytorch/minibench/LlmBenchmarkActivity.java | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java index efb9ab21ea..7236fe317b 100644 --- a/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java +++ b/examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/LlmBenchmarkRunner.java @@ -105,15 +105,15 @@ public void onGenerationStopped() { results.add( new BenchmarkMetric( benchmarkModel, - "model_load_time(ns)", - mStatsDump.loadEnd - mStatsDump.loadStart, + "model_load_time(ms)", + (mStatsDump.loadEnd - mStatsDump.loadStart) * 1e-6, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( benchmarkModel, - "generate_time(ns)", - mStatsDump.generateEnd - mStatsDump.generateStart, + "generate_time(ms)", + (mStatsDump.generateEnd - mStatsDump.generateStart) * 1e-6, 0.0f)); // Token per second results.add( diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java index 4360bdcef8..2397bcfb85 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/BenchmarkActivity.java @@ -56,7 +56,7 @@ protected void onCreate(Bundle savedInstanceState) { for (int i = 0; i < numIter; i++) { long start = System.nanoTime(); module.forward(); - long forwardMs = System.nanoTime() - start; + double forwardMs = (System.nanoTime() - start) * 1e-6; stats.latency.add(forwardMs); } @@ -68,13 +68,13 @@ protected void onCreate(Bundle savedInstanceState) { results.add( new BenchmarkMetric( benchmarkModel, - "avg_inference_latency(ns)", + "avg_inference_latency(ms)", stats.latency.stream().mapToDouble(l -> l).average().orElse(0.0f), 0.0f)); // Model load time results.add( new BenchmarkMetric( - benchmarkModel, "model_load_time(ns)", stats.loadEnd - stats.loadStart, 0.0f)); + benchmarkModel, "model_load_time(ms)", (stats.loadEnd - stats.loadStart) * 1e-6, 0.0f)); // Load status results.add(new BenchmarkMetric(benchmarkModel, "load_status", stats.errorCode, 0)); @@ -90,7 +90,7 @@ protected void onCreate(Bundle savedInstanceState) { class Stats { long loadStart; long loadEnd; - List latency = new ArrayList<>(); + List latency = new ArrayList<>(); int errorCode = 0; @Override diff --git a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java index 04702562ba..3bc38aad40 100644 --- a/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java +++ b/extension/android/benchmark/app/src/main/java/org/pytorch/minibench/LlmBenchmarkActivity.java @@ -97,15 +97,15 @@ public void onGenerationStopped() { results.add( new BenchmarkMetric( benchmarkModel, - "model_load_time(ns)", - mStatsInfo.loadEnd - mStatsInfo.loadStart, + "model_load_time(ms)", + (mStatsInfo.loadEnd - mStatsInfo.loadStart) * 1e-6, 0.0f)); // LLM generate time results.add( new BenchmarkMetric( benchmarkModel, - "generate_time(ns)", - mStatsInfo.generateEnd - mStatsInfo.generateStart, + "generate_time(ms)", + (mStatsInfo.generateEnd - mStatsInfo.generateStart) * 1e-6, 0.0f)); // Token per second results.add(