Skip to content

Commit

Permalink
Ovep develop lnl 1.2 (microsoft#22424)
Browse files Browse the repository at this point in the history
### Description
Support OV2024.4
Refactor tensor initialization check for external weights
Support loading OV Config
OVEP: Tensor Caching fix, Fix accuracy issues
Refactor device memory implementation to make it more generic

### Motivation and Context
The changes are required to fix accuracy issues, support loading of OV
config, support OV2024.4

---------

Co-authored-by: Eric Crawford <[email protected]>
Co-authored-by: saurabhkale17 <[email protected]>
Co-authored-by: Javier E. Martinez <[email protected]>
Co-authored-by: sfatimar <[email protected]>
Co-authored-by: ankitm3k <[email protected]>
Co-authored-by: Preetha Veeramalai <[email protected]>
Co-authored-by: n1harika <[email protected]>
Co-authored-by: jatinwadhwa921 <[email protected]>
  • Loading branch information
9 people authored Oct 14, 2024
1 parent 9b1b4e5 commit 35adba2
Show file tree
Hide file tree
Showing 23 changed files with 379 additions and 281 deletions.
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,7 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DUSE_OPENVINO=1)

if(onnxruntime_NPU_NO_FALLBACK)
add_definitions(-DOPENVINO_CONFIG_NPU=1)
add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1)
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
unsigned char enable_npu_fast_compile;
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
Expand Down
82 changes: 76 additions & 6 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
subgraph_context_.subgraph_name);
ie_cnn_network_ = exe_network_.Get().get_runtime_model();
} else if (global_context_.export_ep_ctx_blob &&
hw_target.find("NPU") != std::string::npos) {
hw_target.find("NPU") != std::string::npos &&
!global_context_.has_external_weights) {
std::shared_ptr<ov::Model> ov_model;
{
const std::string model = model_proto->SerializeAsString();
Expand All @@ -93,7 +94,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor());
}
exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config));
} else if ((!subgraph_context_.has_dynamic_input_shape) &&
} else if (!global_context_.has_external_weights &&
(!subgraph_context_.has_dynamic_input_shape) &&
((hw_target.find("AUTO") == std::string::npos) ||
(global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
// Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
Expand Down Expand Up @@ -178,6 +180,74 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
}
#endif
}

if (!global_context_.load_config.empty()) {
const std::map<std::string, ov::AnyMap>& target_config = global_context_.load_config;

// Parse device types like "AUTO:CPU,GPU" and extract individual devices
auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
std::vector<std::string> devices;
auto delimiter_pos = device_type.find(':');
if (delimiter_pos != std::string::npos) {
std::stringstream str_stream(device_type.substr(delimiter_pos + 1));
std::string device;
while (std::getline(str_stream, device, ',')) {
devices.emplace_back(device);
}
} else {
devices.emplace_back(device_type);
}
return devices;
};

// Check if a property is supported and mutable
auto is_supported_and_mutable = [&](const std::string& key,
const std::vector<ov::PropertyName>& supported_config) -> bool {
auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) {
return property == key && property.is_mutable();
});
return it != supported_config.end();
};

// Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same
auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
const std::vector<ov::PropertyName>& supported_properties) {
for (const auto& [key, value] : config_options) {
if (is_supported_and_mutable(key, supported_properties)) {
global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
} else {
LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
<< "\" is either unsupported in current OpenVINO version"
<< " or property is immutable for target device \""
<< device << "\". Skipping setting this property.";
}
}
};

// Check if the device type is AUTO, HETERO, or MULTI
if (global_context_.device_type.find("AUTO") == 0 ||
global_context_.device_type.find("HETERO") == 0 ||
global_context_.device_type.find("MULTI") == 0) {
// Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
auto individual_devices = parse_individual_devices(global_context_.device_type);
// Set properties only for individual devices (e.g., "CPU", "GPU")
for (const std::string& device : individual_devices) {
if (target_config.count(device)) {
// Get supported properties for each individual device
auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
// Set properties for the device
set_target_properties(device, target_config.at(device), device_properties);
}
}
} else {
if (target_config.count(global_context_.device_type)) {
auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
ov::supported_properties);
set_target_properties(global_context_.device_type,
target_config.at(global_context_.device_type), supported_properties);
}
}
}
}

void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
Expand Down Expand Up @@ -275,7 +345,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
input_tensor_shape[tensor_iter] = *i;
tensor_iter += 1;
}
auto input = graph_input_info.at(input_idx);
const auto& input = graph_input_info.at(input_idx);
OVTensorPtr tensor_ptr;
// avoid input copies on the CPU device
if (global_context_.device_type.find("CPU") != std::string::npos) {
Expand Down Expand Up @@ -316,7 +386,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
infer_request->SetTensor(input_name, ov_tensor_data.tensor_ptr);
infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
ORT_THROW(msg);
}
Expand Down Expand Up @@ -354,14 +424,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
if ((it == ort_ov_tensor_map.end()) ||
(it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) {
ov_tensor_data_t ov_tensor_data;
auto output = graph_output_info.at(output_idx);
const auto& output = graph_output_info.at(output_idx);
ov_tensor_data.ort_ptr = tensor.GetTensorRawData();
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output.get_element_type(), output.get_shape(),
const_cast<void*>(tensor.GetTensorRawData()));
ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data;

try {
infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr);
infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr);
} catch (const char* msg) {
ORT_THROW(msg);
}
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <vector>
#include <map>
#include <unordered_map>
#include <string>
#include "core/providers/openvino/ov_interface.h"
Expand All @@ -15,18 +16,19 @@ namespace openvino_ep {
struct GlobalContext {
OVCore ie_core;
bool is_wholly_supported_graph = false;
bool enable_npu_fast_compile = false;
bool enable_opencl_throttling = false;
bool disable_dynamic_shapes = false;
bool ep_context_embed_mode = true;
bool export_ep_ctx_blob = false;
bool enable_qdq_optimizer = false;
bool disable_cpu_fallback = false;
bool has_external_weights = false;
size_t num_of_threads;
std::string device_type;
std::string precision_str;
std::string model_precision;
std::string cache_dir;
std::map<std::string, ov::AnyMap> load_config;
std::string model_priority = "DEFAULT";
int num_streams;
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
global_context_ = std::make_unique<openvino_ep::GlobalContext>();
global_context_->device_type = info.device_type_;
global_context_->precision_str = info.precision_;
global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
global_context_->cache_dir = info.cache_dir_;
global_context_->load_config = info.load_config_;
global_context_->model_priority = info.model_priority_;
global_context_->num_streams = info.num_streams_;
global_context_->context = info.context_;
Expand Down Expand Up @@ -124,6 +124,7 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
result = obj.Execute();

global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph();
global_context_->has_external_weights = obj.HasExternalWeights();

return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ static std::vector<std::string> parseDevices(const std::string& device_string,
struct OpenVINOExecutionProviderInfo {
std::string device_type_{""};
std::string precision_{""};
bool enable_npu_fast_compile_{false};
size_t num_of_threads_{0};
std::map<std::string, ov::AnyMap> load_config_{};
std::string cache_dir_{""};
std::string model_priority_{""};
int num_streams_{1};
Expand All @@ -94,16 +94,18 @@ struct OpenVINOExecutionProviderInfo {

OpenVINOExecutionProviderInfo() = delete;

explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
bool enable_npu_fast_compile, size_t num_of_threads,
const std::string& cache_dir, const std::string& model_priority,
int num_streams, void* context, bool enable_opencl_throttling,
explicit OpenVINOExecutionProviderInfo(std::string dev_type, const std::string& precision,
size_t num_of_threads,
const std::map<std::string, ov::AnyMap>& load_config,
const std::string& cache_dir,
const std::string& model_priority, int num_streams,
void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
bool enable_qdq_optimizer, bool disable_cpu_fallback,
bool so_epctx_embed_mode)
: precision_(std::move(precision)),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
load_config_(std::move(load_config)),
cache_dir_(std::move(cache_dir)),
model_priority_(std::move(model_priority)),
num_streams_(num_streams),
Expand Down
Loading

0 comments on commit 35adba2

Please sign in to comment.