Support EP plugins

microsoft · Nov 4, 2024 · 04ef34b · 04ef34b
1 parent 0a4a4dd
commit 04ef34b
Show file tree

Hide file tree

Showing 9 changed files with 235 additions and 60 deletions.
diff --git a/c_cxx/accuracy_tool/CMakeLists.txt b/c_cxx/accuracy_tool/CMakeLists.txt
@@ -66,6 +66,8 @@ add_executable(accuracy_test src/main.cc
                              src/cmd_args.cc
                              src/ep_cmd_args/qnn_cmd_args.h
                              src/ep_cmd_args/qnn_cmd_args.cc
+                             src/ep_cmd_args/plugin_cmd_args.h
+                             src/ep_cmd_args/plugin_cmd_args.cc
                              src/basic_utils.h
                              src/basic_utils.cc
                              src/model_io_utils.h

diff --git a/c_cxx/accuracy_tool/README.md b/c_cxx/accuracy_tool/README.md
@@ -112,8 +112,7 @@ models/
  |
  +--> resnet/
  |      |
- |      +--> model.onnx
- |      +--> model.qdq.onnx (quantized model only required for certains EPs like QNN)
+ |      +--> model.onnx (see options --ground_truth_model_name and --ep_model_name)
  |      |
  |      +--> test_data_set_0/
  |      |        |
@@ -127,17 +126,16 @@ models/
  |
  +--> mobilenet/
         |
-        +--> model.onnx
-        +--> model.qdq.onnx
+        +--> model.onnx (Note: same model name)
         |
         +--> test_data_set_0/
         +--> test_data_set_1/
 ```
 
-- All ONNX models must be named either `model.onnx` or `model.qdq.onnx`.
-  - The `model.qdq.onnx` file is only necessary for execution providers that run quantized models (e.g., QNN).
+- By default, tool expects all ONNX models to be named `model.onnx`.
+  - Use the option `--ground_truth_model_name` to set the name of the model used to get the expected (ground-truth) output with CPU EP. Defaults to `model.onnx`.
+  - Use the option `--ep_model_name` to set the name of the model loaded by the EP under test. Defaults to `model.onnx`.
   - If the expected output files are not provided, the expected outputs will be obtained by running `model.onnx` on the CPU execution provider.
-  - Both `model.qdq.onnx` and `model.onnx` must have the same input and output signature (i.e., same names, shapes, types, and ordering).
 - The dataset directories must be named `test_data_set_<index>/`, where `<index>` ranges from 0 to the number of dataset directories.
 - The raw input files must be named `input_<index>.raw`, where `<index>` corresponds to the input's index in the ONNX model.
 - The raw output files are not required if `model.onnx` is provided.
@@ -158,40 +156,52 @@ Usage: accuracy_test.exe [OPTIONS...] test_models_path
                                   Defaults to false.
  -s/--save_expected_outputs       Save outputs from baseline model on CPU EP to disk as
                                   output_<index>.raw files. Defaults to false.
- -e/--execution_provider ep [EP_ARGS]  The execution provider to test (e.g., qnn or cpu)
+ -e/--execution_provider ep [EP_ARGS]  The execution provider to test (e.g., qnn, cpu, or plugin)
                                        Defaults to CPU execution provider running QDQ model.
  -c/--session_configs "<key1>|<val1> <key2>|<val2>"  Session configuration options for EP under test.
                                                      Refer to onnxruntime_session_options_config_keys.h
  -o/--output_file path                 The output file into which to save accuracy results
  -a/--expected_accuracy_file path      The file containing expected accuracy results
+ --ep_model_name onnx_model_name       The name of the ONNX model to test for EP.
+                                       Defaults to 'model.onnx'.
+ --ground_truth_model_name onnx_model_name       The name of the ONNX model used to get
+                                                 expected output with CPU EP.
+                                                 Not used if expected outputs are
+                                                 loaded from file. Defaults to 'model.onnx'.
  --model model_name                    Model to test. Option can be specified multiple times.
                                        By default, all found models are tested.
 
-[EP_ARGS]: Specify EP-specific runtime options as key value pairs.
-  Example: -e <provider_name> "<key1>|<val1> <key2>|<val2>"
-  [QNN only] [backend_path]: QNN backend path (e.g., 'C:\Path\QnnHtp.dll')
-  [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed',
+[EP_ARGS]: Specify EP-specific options.
+  CPU EP: -e qnn
+  QNN EP: -e qnn "<key1>|<val1> <key2>|<val2>"
+      Valid QNN key/val pairs:
+        [backend_path]: QNN backend path (e.g., 'C:\Path\QnnHtp.dll')
+        [profiling_level]: QNN profiling level, options: 'basic', 'detailed',
                                 default 'off'.
-  [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10.
-  [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0 (not set).
-  [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced',
+        [rpc_control_latency]: QNN rpc control latency. default to 10.
+        [vtcm_mb]: QNN VTCM size in MB. default to 0 (not set).
+        [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced',
              'default', 'high_performance', 'high_power_saver',
              'low_balanced', 'low_power_saver', 'power_saver',
              'sustained_high_performance'. Defaults to 'default'.
-  [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal',
+        [qnn_context_priority]: QNN context priority, options: 'low', 'normal',
              'normal_high', 'high'. Defaults to 'normal'.
-  [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g 'C:\Path\QnnSaver.dll'.
-  [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization
+        [qnn_saver_path]: QNN Saver backend path. e.g 'C:\Path\QnnSaver.dll'.
+        [htp_graph_finalization_optimization_mode]: QNN graph finalization
              optimization mode, options: '0', '1', '2', '3'. Default is '0'.
+  Plugin EP: -e plugin <ep_name> <plugin_library_path> "<key1>|<val1> <key2>|<val2>"
+      All key/value pairs are considered session options.
 ```
 
 ## Usage examples
 ### Measure accuracy of QDQ model on CPU EP
+- Assumes each model directory has both a `model.onnx` and a `model.qdq.onnx`.
 - The expected outputs are generated by running the float32 `model.onnx` on CPU EP.
+- The actual outputs are generated by running the QDQ `model.qdq.onnx` on CPU EP.
 - Accuracy results (SNR) are dumped to stdout
 
 ```shell
-$ .\accuracy_test -e cpu models
+$ .\accuracy_test -e cpu --ep_model_name model.qdq.onnx models
 
 [INFO]: Accuracy Results (CSV format):
 
@@ -203,15 +213,15 @@ model_a/test_data_set_2,16.712691432087745
 
 Use the `-o` command-line option to write the accuracy results to file.
 ```shell
-$ .\accuracy_test -o results.csv -e cpu models
+$ .\accuracy_test -o results.csv -e cpu --ep_model_name model.qdq.onnx models
 
 [INFO]: Saved accuracy results to results.csv
 ```
 
 ### Dump (and load) the expected outputs to disk
 Use the `-s` command-line option to dump the expected outputs to disk (e.g., output_0.raw). The expected outputs are obtained by running `model.onnx` on the CPU EP regardless of the EP passed to the `-e` command-line option.
 ```shell
-$ .\accuracy_test -s -e cpu models
+$ .\accuracy_test -s -e cpu --ep_model_name model.qdq.onnx models
 
 [INFO]: Accuracy Results (CSV format):
 
@@ -221,7 +231,7 @@ model_a/test_data_set_0,17.640392603599537
 
 Use the `-l` command-line option to load the expected outputs directly from `output_<index>.raw` files.
 ```shell
-$ .\accuracy_test -l -e cpu models
+$ .\accuracy_test -l -e cpu --ep_model_name model.qdq.onnx models
 
 [INFO]: Accuracy Results (CSV format):
 
@@ -230,13 +240,15 @@ model_a/test_data_set_0,17.640392603599537
 ```
 
 ### Measure accuracy of QDQ model on QNN EP and detect regressions
+- Assumes each model directory has both a `model.onnx` and a `model.qdq.onnx`.
 - The expected outputs are generated by running the float32 `model.onnx` on CPU EP.
+- The actual outputs are generated by running the QDQ `model.qdq.onnx` on QNN EP.
 - Accuracy results (SNR) are dumped to results_0.csv
 - Uses the `-c` command-line option to disable fallback to CPU EP (i.e., entire graph runs on QNN EP).
 - Note: can also use the `-s` or `-l` command-line options to save or load the expected outputs as demonstrated above.
 
 ```shell
-$ .\accuracy_test -e qnn "backend_path|QnnHtp.dll" -c "session.disable_cpu_ep_fallback|1" -o results_0.csv models
+$ .\accuracy_test -e qnn "backend_path|QnnHtp.dll" -c "session.disable_cpu_ep_fallback|1" -o results_0.csv --ep_model_name model.qdq.onnx models
 
 [INFO]: Accuracy Results (CSV format):
 
@@ -249,7 +261,7 @@ model_a/test_data_set_2,16.812691432087745
 Use the `-a` command-line option to compare subsequent runs with previous accuracy results (e.g., results_0.csv). This can help detect accuracy regressions.
 
 ```shell
-.\accuracy_test -a results_o.csv -e qnn "backend_path|QnnHtp.dll" -c "session.disable_cpu_ep_fallback|1" models
+.\accuracy_test -a results_o.csv -e qnn "backend_path|QnnHtp.dll" -c "session.disable_cpu_ep_fallback|1" --ep_model_name model.qdq.onnx models
 
 [INFO]: Accuracy Results (CSV format):
 
@@ -270,3 +282,22 @@ model_a/test_data_set_0,16.640392603599537
 [INFO]: 10/11 tests passed.
 [INFO]: 1/11 tests failed.
 ```
+
+### Measure accuracy of model with "plugin" EP
+- Assumes the models/ directory contains all models to test.
+- Assumes each individual model directory has a `model.onnx` file.
+- The expected outputs are generated by running the float32 `model.onnx` on CPU EP.
+- The actual outputs are generated by running the same `model.onnx` on the plugin EP.
+- Accuracy results (SNR) are dumped to stdout
+
+```shell
+$ .\accuracy_test -e plugin outTreeEP outTreeEP.dll "key1|val1 key2|val2" --ep_model_name model.qdq.onnx models
+
+[INFO]: Accuracy Results (CSV format):
+
+model_a/test_data_set_0,17.640392603599537
+model_a/test_data_set_1,21.326599488217347
+model_a/test_data_set_2,16.712691432087745
+...
+```
+
diff --git a/c_cxx/accuracy_tool/src/accuracy_tester.cc b/c_cxx/accuracy_tool/src/accuracy_tester.cc
@@ -83,29 +83,18 @@ bool RunAccuracyTest(Ort::Env& env, const AppArgs& app_args) {
     std::cout << "[INFO]: Testing model " << model_name << " (" << dataset_paths.size() << " datasets) ... "
               << std::endl;
 
-    std::filesystem::path base_model_path = model_dir_path / "model.onnx";
-    std::filesystem::path ep_model_path;
-
-    // Determine which model will be used by the EP under test.
-    // Some EPs will need to use a QDQ model instead of the the original model.
-    if (app_args.uses_qdq_model) {
-      std::filesystem::path qdq_model_path = model_dir_path / "model.qdq.onnx";
-
-      if (!std::filesystem::is_regular_file(qdq_model_path)) {
-        std::cerr << "[ERROR]: Execution provider '" << app_args.execution_provider << "' requires a QDQ model."
-                  << std::endl;
-        return false;
-      }
-      ep_model_path = std::move(qdq_model_path);
-    } else {
-      ep_model_path = base_model_path;
+    std::filesystem::path ep_model_path = model_dir_path / app_args.ep_model_name;
+    if (!std::filesystem::is_regular_file(ep_model_path)) {
+      std::cerr << "[ERROR]: Cannot find ONNX model " << ep_model_path << " with which to test the EP." << std::endl;
+      return false;
     }
 
     std::vector<std::unique_ptr<char[]>> all_inputs;
     std::vector<std::unique_ptr<char[]>> all_outputs;
 
     // Load expected outputs from base model running on CPU EP (unless user wants to use outputs from disk).
     if (!app_args.load_expected_outputs_from_disk) {
+      std::filesystem::path base_model_path = model_dir_path / app_args.ground_truth_model_name;
       if (!std::filesystem::is_regular_file(base_model_path)) {
         std::cerr << "[ERROR]: Cannot find ONNX model " << base_model_path << " from which to get expected outputs."
                   << std::endl;

diff --git a/c_cxx/accuracy_tool/src/cmd_args.cc b/c_cxx/accuracy_tool/src/cmd_args.cc
@@ -13,6 +13,7 @@
 #include <thread>
 #include <unordered_set>
 
+#include "ep_cmd_args/plugin_cmd_args.h"
 #include "ep_cmd_args/qnn_cmd_args.h"
 
 void PrintUsage(std::ostream& stream, std::string_view prog_name) {
@@ -26,34 +27,45 @@ void PrintUsage(std::ostream& stream, std::string_view prog_name) {
   stream << "                                  Defaults to false." << std::endl;
   stream << " -s/--save_expected_outputs       Save outputs from baseline model on CPU EP to disk as " << std::endl;
   stream << "                                  output_<index>.raw files. Defaults to false." << std::endl;
-  stream << " -e/--execution_provider ep [EP_ARGS]  The execution provider to test (e.g., qnn or cpu)" << std::endl;
+  stream << " -e/--execution_provider ep [EP_ARGS]  The execution provider to test (e.g., qnn, cpu, or plugin)"
+         << std::endl;
   stream << "                                       Defaults to CPU execution provider running QDQ model." << std::endl;
   stream << " -c/--session_configs \"<key1>|<val1> <key2>|<val2>\"  Session configuration options for EP under test."
          << std::endl;
   stream << "                                                     Refer to onnxruntime_session_options_config_keys.h"
          << std::endl;
   stream << " -o/--output_file path                 The output file into which to save accuracy results" << std::endl;
   stream << " -a/--expected_accuracy_file path      The file containing expected accuracy results" << std::endl;
+  stream << " --ep_model_name onnx_model_name       The name of the ONNX model to test for EP." << std::endl;
+  stream << "                                       Defaults to 'model.onnx'." << std::endl;
+  stream << " --ground_truth_model_name onnx_model_name       The name of the ONNX model used to get" << std::endl;
+  stream << "                                                 expected output with CPU EP." << std::endl;
+  stream << "                                                 Not used if expected outputs are" << std::endl;
+  stream << "                                                 loaded from file. Defaults to 'model.onnx'." << std::endl;
   stream << " --model model_name                    Model to test. Option can be specified multiple times."
          << std::endl;
   stream << "                                       By default, all found models are tested." << std::endl;
   stream << std::endl;
-  stream << "[EP_ARGS]: Specify EP-specific runtime options as key value pairs." << std::endl;
-  stream << "  Example: -e <provider_name> \"<key1>|<val1> <key2>|<val2>\"" << std::endl;
-  stream << "  [QNN only] [backend_path]: QNN backend path (e.g., 'C:\\Path\\QnnHtp.dll')" << std::endl;
-  stream << "  [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed'," << std::endl;
+  stream << "[EP_ARGS]: Specify EP-specific options." << std::endl;
+  stream << "  CPU EP: -e qnn" << std::endl;
+  stream << "  QNN EP: -e qnn \"<key1>|<val1> <key2>|<val2>\"" << std::endl;
+  stream << "      Valid QNN key/val pairs:" << std::endl;
+  stream << "        [backend_path]: QNN backend path (e.g., 'C:\\Path\\QnnHtp.dll')" << std::endl;
+  stream << "        [profiling_level]: QNN profiling level, options: 'basic', 'detailed'," << std::endl;
   stream << "                                default 'off'." << std::endl;
-  stream << "  [QNN only] [rpc_control_latency]: QNN rpc control latency. default to 10." << std::endl;
-  stream << "  [QNN only] [vtcm_mb]: QNN VTCM size in MB. default to 0 (not set)." << std::endl;
-  stream << "  [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', " << std::endl;
+  stream << "        [rpc_control_latency]: QNN rpc control latency. default to 10." << std::endl;
+  stream << "        [vtcm_mb]: QNN VTCM size in MB. default to 0 (not set)." << std::endl;
+  stream << "        [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', " << std::endl;
   stream << "             'default', 'high_performance', 'high_power_saver'," << std::endl;
   stream << "             'low_balanced', 'low_power_saver', 'power_saver'," << std::endl;
   stream << "             'sustained_high_performance'. Defaults to 'default'." << std::endl;
-  stream << "  [QNN only] [qnn_context_priority]: QNN context priority, options: 'low', 'normal'," << std::endl;
+  stream << "        [qnn_context_priority]: QNN context priority, options: 'low', 'normal'," << std::endl;
   stream << "             'normal_high', 'high'. Defaults to 'normal'." << std::endl;
-  stream << "  [QNN only] [qnn_saver_path]: QNN Saver backend path. e.g 'C:\\Path\\QnnSaver.dll'." << std::endl;
-  stream << "  [QNN only] [htp_graph_finalization_optimization_mode]: QNN graph finalization" << std::endl;
+  stream << "        [qnn_saver_path]: QNN Saver backend path. e.g 'C:\\Path\\QnnSaver.dll'." << std::endl;
+  stream << "        [htp_graph_finalization_optimization_mode]: QNN graph finalization" << std::endl;
   stream << "             optimization mode, options: '0', '1', '2', '3'. Default is '0'." << std::endl;
+  stream << "  Plugin EP: -e plugin <ep_name> <plugin_library_path> \"<key1>|<val1> <key2>|<val2>\"" << std::endl;
+  stream << "      All key/value pairs are considered session options." << std::endl;
 }
 
 static bool ParseSessionConfigs(const std::string& configs_string,
@@ -90,7 +102,6 @@ static bool ParseSessionConfigs(const std::string& configs_string,
 }
 
 static void SetDefaultCpuEpArgs(AppArgs& app_args) {
-  app_args.uses_qdq_model = true;  // TODO: Make configurable?
   app_args.supports_multithread_inference = true;
   app_args.execution_provider = "cpu";
 }
@@ -129,7 +140,7 @@ bool GetValidPath(std::string_view prog_name, std::string_view provided_path, bo
   return true;
 }
 
-bool ParseCmdLineArgs(AppArgs& app_args, int argc, char** argv) {
+bool ParseCmdLineArgs(AppArgs& app_args, int argc, char** argv, Ort::Env& env) {
   CmdArgParser cmd_args(argc, argv);
   std::string_view prog_name = cmd_args.GetNext();
 
@@ -150,6 +161,22 @@ bool ParseCmdLineArgs(AppArgs& app_args, int argc, char** argv) {
       }
 
       app_args.output_file = cmd_args.GetNext();
+    } else if (arg == "--ep_model_name") {
+      if (!cmd_args.HasNext()) {
+        std::cerr << "[ERROR]: Must provide an argument after the " << arg << " option" << std::endl;
+        PrintUsage(std::cerr, prog_name);
+        return false;
+      }
+
+      app_args.ep_model_name = cmd_args.GetNext();
+    } else if (arg == "--ground_truth_model_name") {
+      if (!cmd_args.HasNext()) {
+        std::cerr << "[ERROR]: Must provide an argument after the " << arg << " option" << std::endl;
+        PrintUsage(std::cerr, prog_name);
+        return false;
+      }
+
+      app_args.ground_truth_model_name = cmd_args.GetNext();
     } else if (arg == "-j" || arg == "--num_threads") {
       if (!cmd_args.HasNext()) {
         std::cerr << "[ERROR]: Must provide an argument after the " << arg << " option" << std::endl;
@@ -199,6 +226,10 @@ bool ParseCmdLineArgs(AppArgs& app_args, int argc, char** argv) {
         }
       } else if (arg == "cpu") {
         SetDefaultCpuEpArgs(app_args);
+      } else if (arg == "plugin") {
+        if (!ParseEpPluginArgs(app_args, cmd_args, prog_name, env)) {
+          return false;
+        }
       } else {
         std::cerr << "[ERROR]: Unsupported execution provider: " << arg << std::endl;
         PrintUsage(std::cerr, prog_name);
@@ -246,6 +277,14 @@ bool ParseCmdLineArgs(AppArgs& app_args, int argc, char** argv) {
     return false;
   }
 
+  if (app_args.ep_model_name.empty()) {
+    app_args.ep_model_name = "model.onnx";
+  }
+
+  if (app_args.ground_truth_model_name.empty()) {
+    app_args.ground_truth_model_name = "model.onnx";
+  }
+
   if (app_args.execution_provider.empty()) {
     SetDefaultCpuEpArgs(app_args);
   }