Updated fix for setting precision with Auto Plugin

Updated fix for setting cache with Auto Plugin
intel · Jul 16, 2024 · 2bd7257 · 2bd7257
1 parent 3f9e669
commit 2bd7257
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 42 deletions.
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -37,7 +37,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   PopulateConfigValue(device_config);
 
   // Enable caching
-  EnableCaching();
+  EnableCaching(device_config);
 
   // Setting OpenCL queue throttling for GPU
   EnableGPUThrottling(device_config);
@@ -90,18 +90,15 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                                                            device_config,
                                                            subgraph_context_.subgraph_name);
         ie_cnn_network_ = exe_network_.Get().get_runtime_model();
-      } else if (!subgraph_context_.has_dynamic_input_shape &&
-                 global_context_.onnx_model_path_name.find(".onnx") != std::string ::npos) {
-        // Inputs with static dimenstions
-        std::string prec_str = (global_context_.precision_str != "ACCURACY") ? global_context_.precision_str : global_context_.model_precision;
-        exe_network_ = global_context_.ie_core.CompileModel(global_context_.onnx_model_path_name,
-                                                            hw_target,
-                                                            prec_str,
-                                                            global_context_.cache_dir,
-                                                            device_config,
-                                                            subgraph_context_.subgraph_name);
-        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
-      } else {  // Inputs with dynamic dimensions
+      } else if ((hw_target=="GPU") || ((hw_target.find("AUTO:GPU") != std::string::npos) &&
+                 (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2)))
+      {
+            // Use ONNX Model Path with GPU and with AUTO:GPU only when version is more than 2024.2
+            exe_network_ = global_context_.ie_core.CompileModel(global_context.onnx_model_path_name,
+                                                                hw_target,
+                                                                device_config,
+                                                                subgraph_context_.subgraph_name);
+      } else {  //For all other types use ov::Model Type
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
         exe_network_ = global_context_.ie_core.CompileModel(
             ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
@@ -172,13 +169,19 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   }
 }
 
-void BasicBackend::EnableCaching() {
+void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
   // cache_dir argument has no effect when working with an embed-mode EPContext Graph
   if (is_ep_ctx_graph_) return;
 
   if (!global_context_.cache_dir.empty()) {
     LOGS_DEFAULT(INFO) << log_tag << "Enables Caching";
-    global_context_.ie_core.SetCache(global_context_.cache_dir, global_context_.device_type);
+    if (global_context_.device_type.find("AUTO:GPU") != std::string::npos) {
+      std::pair<std::string, ov::Any> device_property;
+      device_property = std::make_pair("CACHE_DIR", global_context_.cache_dir);
+      device_config.emplace(ov::device::properties("GPU", device_property));
+    } else {
+      global_context_.ie_core.SetCache(global_context_.cache_dir);
+    }
   }
 }
 

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -37,7 +37,7 @@ class BasicBackend : public IBackend {
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
   bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
-  void EnableCaching();
+  void EnableCaching(ov::AnyMap& device_config);
   void EnableGPUThrottling(ov::AnyMap& device_config);
   void EnableStreams();
   void SetNumThreads(ov::AnyMap& device_config);

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -72,7 +72,7 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model, const std
 OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
                                   std::string& hw_target,
                                   ov::AnyMap& device_config,
-                                  std::string name) {
+                                  const std::string& name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
@@ -88,23 +88,13 @@ OVExeNetwork OVCore::CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_netwo
   }
 }
 
-OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path,
+OVExeNetwork OVCore::CompileModel(const std::string& onnx_model_path,
                                   std::string& hw_target,
-                                  std::string precision,
-                                  std::string cache_dir,
                                   ov::AnyMap& device_config,
-                                  std::string name) {
+                                  const std::string& name) {
   ov::CompiledModel obj;
   try {
-    if (hw_target == "AUTO:GPU,CPU") {
-      obj = oe.compile_model(onnx_model_path,
-                             "AUTO",
-                             ov::device::priorities("GPU", "CPU"),
-                             ov::device::properties("GPU", {ov::cache_dir(cache_dir),
-                                                            ov::hint::inference_precision(precision)}));
-    } else {
-      obj = oe.compile_model(onnx_model_path, hw_target, device_config);
-    }
+    obj = oe.compile_model(onnx_model_path, hw_target, device_config);
 #ifndef NDEBUG
     printDebugInfo(obj);
 #endif
@@ -120,7 +110,7 @@ OVExeNetwork OVCore::CompileModel(const std::string onnx_model_path,
 OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_stream,
                                  std::string& hw_target,
                                  ov::AnyMap& device_config,
-                                 std::string name) {
+                                 const std::string& name) {
   try {
     auto obj = oe.import_model(*model_stream, hw_target, device_config);
 #ifndef NDEBUG
@@ -135,10 +125,8 @@ OVExeNetwork OVCore::ImportModel(std::shared_ptr<std::istringstream> model_strea
   }
 }
 
-void OVCore::SetCache(std::string cache_dir_path, std::string device_type) {
-  if (device_type != "AUTO:GPU,CPU") {
-    oe.set_property(ov::cache_dir(cache_dir_path));
-  }
+void OVCore::SetCache(const std::string& cache_dir_path) {
+     oe.set_property(ov::cache_dir(cache_dir_path));
 }
 
 #ifdef IO_BUFFER_ENABLED

diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -39,27 +39,29 @@ class OVCore {
   ov::Core oe;
 
  public:
+  //OV Interface For Reading Model
   std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream, const std::string& model_path) const;
+  //OV Interface for Compiling OV Model Type
   OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& ie_cnn_network,
                             std::string& hw_target,
                             ov::AnyMap& device_config,
-                            std::string name);
-  OVExeNetwork CompileModel(const std::string onnx_model_path,
+                            const std::string& name);
+  //OV Interface for Fast Compile
+  OVExeNetwork CompileModel(const std::string& onnx_model_path,
                             std::string& hw_target,
-                            std::string precision,
-                            std::string cache_dir,
                             ov::AnyMap& device_config,
-                            std::string name);
+                            const std::string& name);
+  //OV Interface for Import model Stream
   OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream,
                            std::string& hw_target,
                            ov::AnyMap& device_config,
-                           std::string name);
+                           const std::string& name);
 #ifdef IO_BUFFER_ENABLED
   OVExeNetwork CompileModel(std::shared_ptr<const OVNetwork>& model, OVRemoteContextPtr context, std::string& name);
   OVExeNetwork ImportModel(std::shared_ptr<std::istringstream> model_stream, OVRemoteContextPtr context, std::string& name);
 #endif
   std::vector<std::string> GetAvailableDevices();
-  void SetCache(std::string cache_dir_path, std::string device_type);
+  void SetCache(const std::string& cache_dir_path);
   ov::Core& Get() { return oe; }
   void SetStreams(const std::string& device_type, int num_streams);
 };