Skip to content

Commit

Permalink
Merge pull request #427 from intel/reduced_model_proto_scope
Browse files Browse the repository at this point in the history
Reduced model proto scope
  • Loading branch information
sfatimar authored Aug 27, 2024
2 parents 86eb1f4 + bf7a698 commit 65c85b1
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 20 deletions.
24 changes: 13 additions & 11 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,15 @@ BackendManager::BackendManager(const GlobalContext& global_context,
i++;
}
subgraph_context_.subgraph_name = fused_node.Name();
model_proto_ = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type;

if (ModelHasSymbolicInputDims(subgraph)) {
subgraph_context_.has_dynamic_input_shape = true;

// Cache model_proto for all cases with dynamic shapes
model_proto_ = std::move(model_proto);

LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
ORT_ENFORCE(!global_context_.enable_qdq_optimizer,
"QDQ stripping should not be enabled for models with dynamic input shapes. "
Expand All @@ -79,7 +83,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
GetGlobalContext(),
subgraph_context_,
ep_ctx_handle_);
Expand All @@ -99,7 +103,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,

// OV NPU plugin is supported with fallback to OV CPU upon compilation failures.
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
GetGlobalContext(),
subgraph_context_,
ep_ctx_handle_);
Expand All @@ -115,7 +119,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
GetGlobalContext().device_type = "CPU";
GetGlobalContext().precision_str = "FP32";
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
GetGlobalContext(),
subgraph_context_,
ep_ctx_handle_);
Expand Down Expand Up @@ -361,10 +365,10 @@ std::string MakeMapKeyString(const std::vector<std::vector<int64_t>>& shapes,
return key;
}

std::shared_ptr<ONNX_NAMESPACE::ModelProto>
std::unique_ptr<ONNX_NAMESPACE::ModelProto>
BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
const std::vector<std::vector<int64_t>>& input_shapes) {
auto model_copy = std::shared_ptr<ONNX_NAMESPACE::ModelProto>(ONNX_NAMESPACE::ModelProto::Create());
auto model_copy = ONNX_NAMESPACE::ModelProto::Create();
std::string proto_str;
model_proto.SerializeToString(proto_str);
model_copy->ParseFromString(proto_str);
Expand Down Expand Up @@ -418,14 +422,12 @@ void BackendManager::Compute(OrtKernelContext* context) {
// if disable_dynamic_shapes is set to true then execution of dynamic model is done
// by rewriting the model to static shaped model at runtime based on input shape.
// disable_dynamic_shapes is always set to true for OV NPU plugin.
bool use_dynamic_backend = true;
if (subgraph_context_.has_dynamic_input_shape &&
!GetGlobalContext().disable_dynamic_shapes &&
(GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
concrete_backend_->Infer(context);
use_dynamic_backend = false;
} else if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) {
} else if (subgraph_context_.has_dynamic_input_shape) {
std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
std::shared_ptr<IBackend> dynamic_backend;
Expand All @@ -437,7 +439,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
<< "Backend created for graph " << subgraph_context_.subgraph_name;
auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes);
try {
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
GetGlobalContext(),
subgraph_context_,
ep_ctx_handle_);
Expand All @@ -456,7 +458,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
GetGlobalContext().precision_str = "FP32";
key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
try {
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
GetGlobalContext(),
subgraph_context_,
ep_ctx_handle_);
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class BackendManager {
std::shared_ptr<ONNX_NAMESPACE::ModelProto>
ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_proto);

std::shared_ptr<ONNX_NAMESPACE::ModelProto>
std::unique_ptr<ONNX_NAMESPACE::ModelProto>
ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
const std::vector<std::vector<int64_t>>& input_shapes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace onnxruntime {
namespace openvino_ep {

std::shared_ptr<IBackend>
BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
BackendFactory::MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context,
EPCtxHandler& ep_ctx_handle) {
Expand Down
13 changes: 8 additions & 5 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace openvino_ep {

using namespace backend_utils;

BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context,
EPCtxHandler& ep_ctx_handle)
Expand Down Expand Up @@ -94,7 +94,10 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
hw_target.find("NPU") != std::string::npos) {
std::shared_ptr<ov::Model> ov_model;
{
const std::string model = model_proto.SerializeAsString();
const std::string model = model_proto->SerializeAsString();
if (!subgraph_context.has_dynamic_input_shape) {
delete model_proto.release();
}
ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor());
}
exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config));
Expand All @@ -103,19 +106,19 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
(global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
// Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
// Inputs with static dimenstions
const std::string model = model_proto.SerializeAsString();
const std::string model = model_proto->SerializeAsString();
exe_network_ = global_context_.ie_core.CompileModel(model,
hw_target,
device_config,
subgraph_context_.subgraph_name);
} else { // For all other types use ov::Model Type
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.CompileModel(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
}
#endif
} else { // Full graph is not supported
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_);
ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.CompileModel(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace openvino_ep {
class InferRequestsQueue;
class BasicBackend : public IBackend {
public:
BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context,
EPCtxHandler& ep_ctx_handle);
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/openvino/ibackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class IBackend {
class BackendFactory {
public:
static std::shared_ptr<IBackend>
MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context,
EPCtxHandler& ctx_handle);
Expand Down

0 comments on commit 65c85b1

Please sign in to comment.