From 808d112bc455d39f965a992e36e1eaaf894f94ee Mon Sep 17 00:00:00 2001 From: "Javier E. Martinez" Date: Sun, 25 Aug 2024 23:13:22 -0700 Subject: [PATCH 1/3] Fix double EpCtx write to cache --- onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 49c009ac60178..ee9486a62ea37 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -88,7 +88,6 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer, if (!model_proto->SerializeToOstream(epctx_onnx_model)) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to serialize model to file"); } - model_proto->SerializeToOstream(epctx_onnx_model); } LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Export blob as EPContext Node"; From 792523185d156550165eab34c92184b25bf09267 Mon Sep 17 00:00:00 2001 From: "Javier E. Martinez" Date: Sun, 25 Aug 2024 23:22:09 -0700 Subject: [PATCH 2/3] Limit model_proto scope to before actual compile except for dynamic shape models. --- .../providers/openvino/backend_manager.cc | 21 +++++++++---------- .../core/providers/openvino/backend_manager.h | 2 +- .../openvino/backends/backend_factory.cc | 2 +- .../openvino/backends/basic_backend.cc | 13 +++++++----- .../openvino/backends/basic_backend.h | 2 +- .../core/providers/openvino/ibackend.h | 2 +- 6 files changed, 22 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 4d2e38022b66f..9e94f8d4e98c6 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -64,7 +64,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, i++; } subgraph_context_.subgraph_name = fused_node.Name(); - model_proto_ = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger); std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type; if (ModelHasSymbolicInputDims(subgraph)) { @@ -79,10 +79,11 @@ BackendManager::BackendManager(const GlobalContext& global_context, LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " << "Creating backend Dynamic Shapes"; try { - concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, + concrete_backend_ = BackendFactory::MakeBackend(model_proto, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); + model_proto_ = std::move(model_proto); } catch (std::string const& msg) { ORT_THROW(msg); } @@ -99,7 +100,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, // OV NPU plugin is supported with fallback to OV CPU upon compilation failures. try { - concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, + concrete_backend_ = BackendFactory::MakeBackend(model_proto, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); @@ -115,7 +116,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, GetGlobalContext().device_type = "CPU"; GetGlobalContext().precision_str = "FP32"; try { - concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, + concrete_backend_ = BackendFactory::MakeBackend(model_proto, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); @@ -361,10 +362,10 @@ std::string MakeMapKeyString(const std::vector>& shapes, return key; } -std::shared_ptr +std::unique_ptr BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto, const std::vector>& input_shapes) { - auto model_copy = std::shared_ptr(ONNX_NAMESPACE::ModelProto::Create()); + auto model_copy = ONNX_NAMESPACE::ModelProto::Create(); std::string proto_str; model_proto.SerializeToString(proto_str); model_copy->ParseFromString(proto_str); @@ -418,14 +419,12 @@ void BackendManager::Compute(OrtKernelContext* context) { // if disable_dynamic_shapes is set to true then execution of dynamic model is done // by rewriting the model to static shaped model at runtime based on input shape. // disable_dynamic_shapes is always set to true for OV NPU plugin. - bool use_dynamic_backend = true; if (subgraph_context_.has_dynamic_input_shape && !GetGlobalContext().disable_dynamic_shapes && (GetGlobalContext().device_type.find("CPU") != std::string::npos || GetGlobalContext().device_type.find("GPU") != std::string::npos)) { concrete_backend_->Infer(context); - use_dynamic_backend = false; - } else if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) { + } else if (subgraph_context_.has_dynamic_input_shape) { std::vector> tensor_shapes = GetInputTensorShapes(ctx); auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type); std::shared_ptr dynamic_backend; @@ -437,7 +436,7 @@ void BackendManager::Compute(OrtKernelContext* context) { << "Backend created for graph " << subgraph_context_.subgraph_name; auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes); try { - dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, + dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); @@ -456,7 +455,7 @@ void BackendManager::Compute(OrtKernelContext* context) { GetGlobalContext().precision_str = "FP32"; key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type); try { - dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, + dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h index 3e555baad3476..b9ff7a72372b3 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.h +++ b/onnxruntime/core/providers/openvino/backend_manager.h @@ -43,7 +43,7 @@ class BackendManager { std::shared_ptr ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto); - std::shared_ptr + std::unique_ptr ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto, const std::vector>& input_shapes); diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc index ce7e1c9f7c2b4..b7e4aed6e7e18 100644 --- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc +++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc @@ -11,7 +11,7 @@ namespace onnxruntime { namespace openvino_ep { std::shared_ptr -BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, +BackendFactory::MakeBackend(std::unique_ptr& model_proto, GlobalContext& global_context, const SubGraphContext& subgraph_context, EPCtxHandler& ep_ctx_handle) { diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 0ee2926ce1fcc..d72aad3c72097 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -20,7 +20,7 @@ namespace openvino_ep { using namespace backend_utils; -BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, +BasicBackend::BasicBackend(std::unique_ptr& model_proto, GlobalContext& global_context, const SubGraphContext& subgraph_context, EPCtxHandler& ep_ctx_handle) @@ -94,7 +94,10 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, hw_target.find("NPU") != std::string::npos) { std::shared_ptr ov_model; { - const std::string model = model_proto.SerializeAsString(); + const std::string model = model_proto->SerializeAsString(); + if (!subgraph_context.has_dynamic_input_shape) { + delete model_proto.release(); + } ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor()); } exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config)); @@ -103,19 +106,19 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) { // Optimized OV compile_model API is supported with AUTO from version 2024.3 and above // Inputs with static dimenstions - const std::string model = model_proto.SerializeAsString(); + const std::string model = model_proto->SerializeAsString(); exe_network_ = global_context_.ie_core.CompileModel(model, hw_target, device_config, subgraph_context_.subgraph_name); } else { // For all other types use ov::Model Type - ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); + ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); } #endif } else { // Full graph is not supported - ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); + ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.CompileModel( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); } diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index bcd3161590ba0..cd242a06b27d4 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -23,7 +23,7 @@ namespace openvino_ep { class InferRequestsQueue; class BasicBackend : public IBackend { public: - BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, + BasicBackend(std::unique_ptr& model_proto, GlobalContext& global_context, const SubGraphContext& subgraph_context, EPCtxHandler& ep_ctx_handle); diff --git a/onnxruntime/core/providers/openvino/ibackend.h b/onnxruntime/core/providers/openvino/ibackend.h index eb0d8e8823896..7a2d6f4e8cd69 100644 --- a/onnxruntime/core/providers/openvino/ibackend.h +++ b/onnxruntime/core/providers/openvino/ibackend.h @@ -20,7 +20,7 @@ class IBackend { class BackendFactory { public: static std::shared_ptr - MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, + MakeBackend(std::unique_ptr& model_proto, GlobalContext& global_context, const SubGraphContext& subgraph_context, EPCtxHandler& ctx_handle); From bf7a6985345aabcf922b79ecd2923346320481df Mon Sep 17 00:00:00 2001 From: "Javier E. Martinez" Date: Mon, 26 Aug 2024 10:52:04 -0700 Subject: [PATCH 3/3] Dynamic shape fix for cached model_proto lifetime optimization --- onnxruntime/core/providers/openvino/backend_manager.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 9e94f8d4e98c6..cffc5b91864d2 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -69,6 +69,10 @@ BackendManager::BackendManager(const GlobalContext& global_context, if (ModelHasSymbolicInputDims(subgraph)) { subgraph_context_.has_dynamic_input_shape = true; + + // Cache model_proto for all cases with dynamic shapes + model_proto_ = std::move(model_proto); + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims"; ORT_ENFORCE(!global_context_.enable_qdq_optimizer, "QDQ stripping should not be enabled for models with dynamic input shapes. " @@ -83,7 +87,6 @@ BackendManager::BackendManager(const GlobalContext& global_context, GetGlobalContext(), subgraph_context_, ep_ctx_handle_); - model_proto_ = std::move(model_proto); } catch (std::string const& msg) { ORT_THROW(msg); }