[CoreML ML Program] support acclerators selector

microsoft · Oct 10, 2024 · 871ade4 · 871ade4
1 parent 2bef89c
commit 871ade4
Show file tree

Hide file tree

Showing 10 changed files with 78 additions and 19 deletions.
diff --git a/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h b/include/onnxruntime/core/providers/coreml/coreml_provider_factory.h
@@ -12,10 +12,6 @@
 enum COREMLFlags {
   COREML_FLAG_USE_NONE = 0x000,
 
-  // Using CPU only in CoreML EP, this may decrease the perf but will provide
-  // reference output value without precision loss, which is useful for validation
-  COREML_FLAG_USE_CPU_ONLY = 0x001,
-
   // Enable CoreML EP on subgraph
   COREML_FLAG_ENABLE_ON_SUBGRAPH = 0x002,
 
@@ -28,8 +24,14 @@ enum COREMLFlags {
   // dynamic shapes. However, the performance may be negatively impacted if inputs have dynamic shapes.
   COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008,
 
+  // Using CPU only in CoreML EP, this may decrease the perf but will provide
+  // reference output value without precision loss, which is useful for validation
+  COREML_FLAG_USE_CPU_ONLY = 0x010,
+  COREML_FLAG_USE_CPUAndGPU = 0x020,
+  COREML_FLAG_USE_CPUAndNeuralEngine = 0x040,
+  COREML_FLAG_USE_UnitAll = 0x080,
   // Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or later.
-  COREML_FLAG_CREATE_MLPROGRAM = 0x010,
+  COREML_FLAG_CREATE_MLPROGRAM = 0x100,
 
   // Keep COREML_FLAG_LAST at the end of the enum definition
   // And assign the last COREMLFlag to it

diff --git a/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java b/java/src/main/java/ai/onnxruntime/providers/CoreMLFlags.java
@@ -6,11 +6,6 @@
 
 /** Flags for the CoreML provider. */
 public enum CoreMLFlags implements OrtFlags {
-  /**
-   * Use only the CPU, disables the GPU and Apple Neural Engine. Only recommended for developer
-   * usage as it significantly impacts performance.
-   */
-  CPU_ONLY(1), // COREML_FLAG_USE_CPU_ONLY(0x001)
   /** Enables CoreML on subgraphs. */
   ENABLE_ON_SUBGRAPH(2), // COREML_FLAG_ENABLE_ON_SUBGRAPH(0x002)
   /** Only enable usage of CoreML if the device has an Apple Neural Engine. */
@@ -21,11 +16,19 @@ public enum CoreMLFlags implements OrtFlags {
    * have dynamic shapes.
    */
   ONLY_ALLOW_STATIC_INPUT_SHAPES(8), // COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES(0x008)
+  /**
+   * Use only the CPU, disables the GPU and Apple Neural Engine. Only recommended for developer
+   * usage as it significantly impacts performance.
+   */
+  CPU_ONLY(16), // COREML_FLAG_USE_CPU_ONLY(0x010)
+  CPU_And_GPU(32),// MLComputeUnitsCPUAndGPU(0x020)
+  CPU_And_NeuralEngine(64),// MLComputeUnitsCPUAndNeuralEngine(0x040)
+  Units_All(128),// MLComputeUnitsAll(0x080)
   /**
    * Create an MLProgram. By default it will create a NeuralNetwork model. Requires Core ML 5 or
    * later.
    */
-  CREATE_MLPROGRAM(16); // COREML_FLAG_CREATE_MLPROGRAM(0x010)
+  CREATE_MLPROGRAM(256); // COREML_FLAG_CREATE_MLPROGRAM(0x100)
 
   /** The native value of the enum. */
   public final int value;

diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
@@ -315,11 +315,14 @@ export declare namespace InferenceSession {
      * The bit flags for CoreML execution provider.
      *
      * ```
-     * COREML_FLAG_USE_CPU_ONLY = 0x001
      * COREML_FLAG_ENABLE_ON_SUBGRAPH = 0x002
      * COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004
      * COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008
-     * COREML_FLAG_CREATE_MLPROGRAM = 0x010
+     * COREML_FLAG_USE_CPU_ONLY = 0x010
+     * COREML_FLAG_USE_CPUAndGPU = 0x020
+     * COREML_FLAG_USE_CPUAndNeuralEngine = 0x040
+     * COREML_FLAG_USE_UnitAll = 0x080
+     * COREML_FLAG_CREATE_MLPROGRAM = 0x100
      * ```
      *
      * See include/onnxruntime/core/providers/coreml/coreml_provider_factory.h for more details.
@@ -333,6 +336,9 @@ export declare namespace InferenceSession {
      * This setting is available only in ONNXRuntime (react-native).
      */
     useCPUOnly?: boolean;
+    useCPUAndGPU?: boolean;
+    useCPUAndANE?: boolean;
+    useALL?: boolean;
     /**
      * Specify whether to enable CoreML EP on subgraph.
      *

diff --git a/js/react_native/ios/OnnxruntimeModule.mm b/js/react_native/ios/OnnxruntimeModule.mm
@@ -389,6 +389,12 @@ - (NSDictionary*)run:(NSString*)url
         if (useOptions) {
           if ([[executionProvider objectForKey:@"useCPUOnly"] boolValue]) {
             coreml_flags |= COREML_FLAG_USE_CPU_ONLY;
+          } else if ([[executionProvider objectForKey:@"useCPUAndGPU"] boolValue]) {
+            coreml_flags |= COREML_FLAG_USE_CPUAndGPU;
+          } else if ([[executionProvider objectForKey:@"useCPUAndANE"] boolValue]) {
+            coreml_flags |= COREML_FLAG_USE_CPUAndNeuralEngine;
+          } else if ([[executionProvider objectForKey:@"useALL"] boolValue]) {
+            coreml_flags |= COREML_FLAG_USE_UnitAll;
           }
           if ([[executionProvider objectForKey:@"enableOnSubgraph"] boolValue]) {
             coreml_flags |= COREML_FLAG_ENABLE_ON_SUBGRAPH;

diff --git a/objectivec/include/ort_coreml_execution_provider.h b/objectivec/include/ort_coreml_execution_provider.h
@@ -26,10 +26,24 @@ NS_ASSUME_NONNULL_BEGIN
 @interface ORTCoreMLExecutionProviderOptions : NSObject
 
 /**
+ * The bit flags for CoreML execution provider.
+ *
+ * ```
+ * COREML_FLAG_ENABLE_ON_SUBGRAPH = 0x002
+ * COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE = 0x004
+ * COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES = 0x008
+ * COREML_FLAG_USE_CPU_ONLY = 0x010
+ * MLComputeUnitsCPUAndGPU = 0x020
+ * COREML_FLAG_USE_CPUAndNeuralEngine = 0x040
+ * COREML_FLAG_USE_UnitAll = 0x080
+ * COREML_FLAG_CREATE_MLPROGRAM = 0x100
+ * ```
  * Whether the CoreML execution provider should run on CPU only.
  */
 @property BOOL useCPUOnly;
-
+@property BOOL useCPUAndGPU;
+@property BOOL useCPUAndANE;
+@property BOOL useALL;
 /**
  * Whether the CoreML execution provider is enabled on subgraphs.
  */

diff --git a/objectivec/ort_coreml_execution_provider.mm b/objectivec/ort_coreml_execution_provider.mm
@@ -25,6 +25,9 @@ - (BOOL)appendCoreMLExecutionProviderWithOptions:(ORTCoreMLExecutionProviderOpti
   try {
     const uint32_t flags =
         (options.useCPUOnly ? COREML_FLAG_USE_CPU_ONLY : 0) |
+        (options.useCPUAndGPU ? COREML_FLAG_USE_CPUAndGPU : 0) |
+        (options.useCPUAndANE ? COREML_FLAG_USE_CPUAndNeuralEngine : 0) |
+        (options.useALL ? COREML_FLAG_USE_UnitAll : 0) |
         (options.enableOnSubgraphs ? COREML_FLAG_ENABLE_ON_SUBGRAPH : 0) |
         (options.onlyEnableForDevicesWithANE ? COREML_FLAG_ONLY_ENABLE_DEVICE_WITH_ANE : 0) |
         (options.onlyAllowStaticInputShapes ? COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES : 0) |

diff --git a/onnxruntime/core/providers/coreml/model/model.mm b/onnxruntime/core/providers/coreml/model/model.mm
@@ -395,9 +395,16 @@ Status Predict(const std::unordered_map<std::string, OnnxTensorData>& inputs,
       compiled_model_path_ = [compileUrl path];
 
       MLModelConfiguration* config = [[MLModelConfiguration alloc] init];
-      config.computeUnits = (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY)
-                                ? MLComputeUnitsCPUOnly
-                                : MLComputeUnitsAll;
+      if (coreml_flags_ & COREML_FLAG_USE_CPU_ONLY) {
+        config.computeUnits = MLComputeUnitsCPUOnly;
+      } else if (coreml_flags_ & COREML_FLAG_USE_CPUAndGPU) {
+        config.computeUnits = MLComputeUnitsCPUAndGPU;
+      } else if (coreml_flags_ & COREML_FLAG_USE_CPUAndNeuralEngine) {
+        config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
+      } else {
+        config.computeUnits = MLComputeUnitsAll;
+      }
+
       model_ = [MLModel modelWithContentsOfURL:compileUrl configuration:config error:&error];
 
       if (error != nil || model_ == nil) {

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1219,6 +1219,12 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 
         if (flags_str.find("COREML_FLAG_USE_CPU_ONLY") != std::string::npos) {
           coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPU_ONLY;
+        } else if (flags_str.find("COREML_FLAG_USE_CPUAndGPU") != std::string::npos) {
+          coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPUAndGPU;
+        } else if (flags_str.find("COREML_FLAG_USE_CPUAndNeuralEngine") != std::string::npos) {
+          coreml_flags |= COREMLFlags::COREML_FLAG_USE_CPUAndNeuralEngine;
+        } else if (flags_str.find("COREML_FLAG_USE_UnitAll") != std::string::npos) {
+          coreml_flags |= COREMLFlags::COREML_FLAG_USE_UnitAll;
         }
 
         if (flags_str.find("COREML_FLAG_ONLY_ALLOW_STATIC_INPUT_SHAPES") != std::string::npos) {

diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
@@ -128,8 +128,8 @@ namespace perftest {
       "\t    [NNAPI only] [NNAPI_FLAG_CPU_ONLY]: Using CPU only in NNAPI EP.\n"
       "\t    [Example] [For NNAPI EP] -e nnapi -i \"NNAPI_FLAG_USE_FP16 NNAPI_FLAG_USE_NCHW NNAPI_FLAG_CPU_DISABLED\"\n"
       "\n"
-      "\t    [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM]: Create an ML Program model instead of Neural Network.\n"
-      "\t    [Example] [For CoreML EP] -e coreml -i \"COREML_FLAG_CREATE_MLPROGRAM\"\n"
+      "\t    [CoreML only] [COREML_FLAG_CREATE_MLPROGRAM COREML_FLAG_USE_CPU_ONLY COREML_FLAG_USE_CPUAndGPU COREML_FLAG_USE_CPUAndNeuralEngine  COREML_FLAG_USE_UnitAll]: Create an ML Program model instead of Neural Network.\n"
+      "\t    [Example] [For CoreML EP] -e coreml -i \"COREML_FLAG_CREATE_MLPROGRAM COREML_FLAG_USE_UnitAll\", COREML_FLAG_USE_UnitAll by default\n"
       "\n"
       "\t    [SNPE only] [runtime]: SNPE runtime, options: 'CPU', 'GPU', 'GPU_FLOAT16', 'DSP', 'AIP_FIXED_TF'. \n"
       "\t    [SNPE only] [priority]: execution priority, options: 'low', 'normal'. \n"

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -426,6 +426,18 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
       if (key == "COREML_FLAG_CREATE_MLPROGRAM") {
         coreml_flags |= COREML_FLAG_CREATE_MLPROGRAM;
         std::cout << "Enabling ML Program.\n";
+      } else if (key == "COREML_FLAG_USE_CPU_ONLY") {
+        coreml_flags |= COREML_FLAG_USE_CPU_ONLY;
+        std::cout << "CoreML enabled COREML_FLAG_USE_CPU_ONLY.\n";
+      } else if (key == "COREML_FLAG_USE_CPUAndGPU") {
+        coreml_flags |= COREML_FLAG_USE_CPUAndGPU;
+        std::cout << "CoreML enabled COREML_FLAG_USE_CPUAndGPU.\n";
+      } else if (key == "COREML_FLAG_USE_CPUAndNeuralEngine") {
+        coreml_flags |= COREML_FLAG_USE_CPUAndNeuralEngine;
+        std::cout << "CoreML enabled COREML_FLAG_USE_CPUAndNeuralEngine.\n";
+      } else if (key == "COREML_FLAG_USE_UnitAll") {
+        coreml_flags |= COREML_FLAG_USE_UnitAll;
+        std::cout << "CoreML enabled COREML_FLAG_USE_UnitAll.\n";
       } else if (key.empty()) {
       } else {
         ORT_THROW(