diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 2e9be26fb9920..ef208f59f63b0 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1352,6 +1352,7 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DUSE_OPENVINO=1) if(onnxruntime_NPU_NO_FALLBACK) + add_definitions(-DOPENVINO_CONFIG_NPU=1) add_definitions(-DOPENVINO_DISABLE_NPU_FALLBACK=1) endif() diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake index 2eb3611bae902..5dcee285a5b13 100644 --- a/cmake/onnxruntime_providers_openvino.cmake +++ b/cmake/onnxruntime_providers_openvino.cmake @@ -37,7 +37,7 @@ source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs}) onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc") - onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx) + onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json) install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/) set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 9cd5d6169bb52..9e71997c1e442 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -645,7 +645,7 @@ typedef struct OrtOpenVINOProviderOptions { * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16" */ const char* device_type; - unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled + unsigned char enable_npu_fast_compile; const char* device_id; size_t num_of_threads; ///< 0 = Use default number of threads const char* cache_dir; // path is set to empty by default diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 71a02f076c8cc..8a1844544328c 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -83,7 +83,8 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr subgraph_context_.subgraph_name); ie_cnn_network_ = exe_network_.Get().get_runtime_model(); } else if (global_context_.export_ep_ctx_blob && - hw_target.find("NPU") != std::string::npos) { + hw_target.find("NPU") != std::string::npos && + !global_context_.has_external_weights) { std::shared_ptr ov_model; { const std::string model = model_proto->SerializeAsString(); @@ -93,7 +94,8 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor()); } exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config)); - } else if ((!subgraph_context_.has_dynamic_input_shape) && + } else if (!global_context_.has_external_weights && + (!subgraph_context_.has_dynamic_input_shape) && ((hw_target.find("AUTO") == std::string::npos) || (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) { // Optimized OV compile_model API is supported with AUTO from version 2024.3 and above @@ -178,6 +180,74 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { } #endif } + + if (!global_context_.load_config.empty()) { + const std::map& target_config = global_context_.load_config; + + // Parse device types like "AUTO:CPU,GPU" and extract individual devices + auto parse_individual_devices = [&](const std::string& device_type) -> std::vector { + std::vector devices; + auto delimiter_pos = device_type.find(':'); + if (delimiter_pos != std::string::npos) { + std::stringstream str_stream(device_type.substr(delimiter_pos + 1)); + std::string device; + while (std::getline(str_stream, device, ',')) { + devices.emplace_back(device); + } + } else { + devices.emplace_back(device_type); + } + return devices; + }; + + // Check if a property is supported and mutable + auto is_supported_and_mutable = [&](const std::string& key, + const std::vector& supported_config) -> bool { + auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) { + return property == key && property.is_mutable(); + }); + return it != supported_config.end(); + }; + + // Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same + auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options, + const std::vector& supported_properties) { + for (const auto& [key, value] : config_options) { + if (is_supported_and_mutable(key, supported_properties)) { + global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}}); + } else { + LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key + << "\" is either unsupported in current OpenVINO version" + << " or property is immutable for target device \"" + << device << "\". Skipping setting this property."; + } + } + }; + + // Check if the device type is AUTO, HETERO, or MULTI + if (global_context_.device_type.find("AUTO") == 0 || + global_context_.device_type.find("HETERO") == 0 || + global_context_.device_type.find("MULTI") == 0) { + // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"]) + auto individual_devices = parse_individual_devices(global_context_.device_type); + // Set properties only for individual devices (e.g., "CPU", "GPU") + for (const std::string& device : individual_devices) { + if (target_config.count(device)) { + // Get supported properties for each individual device + auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties); + // Set properties for the device + set_target_properties(device, target_config.at(device), device_properties); + } + } + } else { + if (target_config.count(global_context_.device_type)) { + auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type, + ov::supported_properties); + set_target_properties(global_context_.device_type, + target_config.at(global_context_.device_type), supported_properties); + } + } + } } void BasicBackend::EnableCaching(ov::AnyMap& device_config) { @@ -275,7 +345,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque input_tensor_shape[tensor_iter] = *i; tensor_iter += 1; } - auto input = graph_input_info.at(input_idx); + const auto& input = graph_input_info.at(input_idx); OVTensorPtr tensor_ptr; // avoid input copies on the CPU device if (global_context_.device_type.find("CPU") != std::string::npos) { @@ -316,7 +386,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data; try { - infer_request->SetTensor(input_name, ov_tensor_data.tensor_ptr); + infer_request->SetTensor(std::move(input_name), ov_tensor_data.tensor_ptr); } catch (const char* msg) { ORT_THROW(msg); } @@ -354,14 +424,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque if ((it == ort_ov_tensor_map.end()) || (it != ort_ov_tensor_map.end() && (it->second.ort_ptr != tensor.GetTensorRawData()))) { ov_tensor_data_t ov_tensor_data; - auto output = graph_output_info.at(output_idx); + const auto& output = graph_output_info.at(output_idx); ov_tensor_data.ort_ptr = tensor.GetTensorRawData(); ov_tensor_data.tensor_ptr = std::make_shared(output.get_element_type(), output.get_shape(), const_cast(tensor.GetTensorRawData())); ort_ov_tensor_map[ort_tensor_key] = ov_tensor_data; try { - infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr); + infer_request->SetTensor(std::move(output_name), ov_tensor_data.tensor_ptr); } catch (const char* msg) { ORT_THROW(msg); } diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 598e985676f8d..a2f4b236213cc 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include "core/providers/openvino/ov_interface.h" @@ -15,18 +16,19 @@ namespace openvino_ep { struct GlobalContext { OVCore ie_core; bool is_wholly_supported_graph = false; - bool enable_npu_fast_compile = false; bool enable_opencl_throttling = false; bool disable_dynamic_shapes = false; bool ep_context_embed_mode = true; bool export_ep_ctx_blob = false; bool enable_qdq_optimizer = false; bool disable_cpu_fallback = false; + bool has_external_weights = false; size_t num_of_threads; std::string device_type; std::string precision_str; std::string model_precision; std::string cache_dir; + std::map load_config; std::string model_priority = "DEFAULT"; int num_streams; std::vector deviceAvailableList = {true, true, true, true, true, true, true, true}; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 08144651319cf..19a634818a442 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -25,8 +25,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv global_context_ = std::make_unique(); global_context_->device_type = info.device_type_; global_context_->precision_str = info.precision_; - global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_; global_context_->cache_dir = info.cache_dir_; + global_context_->load_config = info.load_config_; global_context_->model_priority = info.model_priority_; global_context_->num_streams = info.num_streams_; global_context_->context = info.context_; @@ -124,6 +124,7 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, result = obj.Execute(); global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph(); + global_context_->has_external_weights = obj.HasExternalWeights(); return result; } diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 8b1c62c607f6e..7d9da65ea7e07 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -79,8 +79,8 @@ static std::vector parseDevices(const std::string& device_string, struct OpenVINOExecutionProviderInfo { std::string device_type_{""}; std::string precision_{""}; - bool enable_npu_fast_compile_{false}; size_t num_of_threads_{0}; + std::map load_config_{}; std::string cache_dir_{""}; std::string model_priority_{""}; int num_streams_{1}; @@ -94,16 +94,18 @@ struct OpenVINOExecutionProviderInfo { OpenVINOExecutionProviderInfo() = delete; - explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision, - bool enable_npu_fast_compile, size_t num_of_threads, - const std::string& cache_dir, const std::string& model_priority, - int num_streams, void* context, bool enable_opencl_throttling, + explicit OpenVINOExecutionProviderInfo(std::string dev_type, const std::string& precision, + size_t num_of_threads, + const std::map& load_config, + const std::string& cache_dir, + const std::string& model_priority, int num_streams, + void* context, bool enable_opencl_throttling, bool disable_dynamic_shapes, bool export_ep_ctx_blob, bool enable_qdq_optimizer, bool disable_cpu_fallback, bool so_epctx_embed_mode) : precision_(std::move(precision)), - enable_npu_fast_compile_(enable_npu_fast_compile), num_of_threads_(num_of_threads), + load_config_(std::move(load_config)), cache_dir_(std::move(cache_dir)), model_priority_(std::move(model_priority)), num_streams_(num_streams), diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 077ecc717502f..b46106db3c232 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -1,65 +1,81 @@ // Copyright (C) Intel Corporation // Licensed under the MIT License +#include +#include #include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/openvino_provider_factory.h" #include "core/providers/openvino/openvino_execution_provider.h" #include "core/providers/openvino/openvino_provider_factory_creator.h" +#include "nlohmann/json.hpp" namespace onnxruntime { struct OpenVINOProviderFactory : IExecutionProviderFactory { - OpenVINOProviderFactory(const char* device_type, const char* precision, - bool enable_npu_fast_compile, size_t num_of_threads, - const char* cache_dir, const char* model_priority, - int num_streams, void* context, + OpenVINOProviderFactory(const std::string& device_type, const std::string& precision, + size_t num_of_threads, + const std::map& load_config, const std::string& cache_dir, + const std::string& model_priority, int num_streams, void* context, bool enable_opencl_throttling, bool disable_dynamic_shapes, - bool export_ep_ctx_blob, bool enable_qdq_optimizer, - bool disable_cpu_fallback, - bool so_epctx_embed_mode) - : precision_(precision), - enable_npu_fast_compile_(enable_npu_fast_compile), + bool enable_qdq_optimizer, const ConfigOptions& config_options) + : device_type_(device_type), + precision_(precision), num_of_threads_(num_of_threads), + load_config_(load_config), + cache_dir_(cache_dir), model_priority_(model_priority), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), disable_dynamic_shapes_(disable_dynamic_shapes), - export_ep_ctx_blob_(export_ep_ctx_blob), enable_qdq_optimizer_(enable_qdq_optimizer), - disable_cpu_fallback_(disable_cpu_fallback), - so_epctx_embed_mode_(so_epctx_embed_mode) { - device_type_ = (device_type == nullptr) ? "" : device_type; - cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir; - } + config_options_(config_options) {} - ~OpenVINOProviderFactory() override { - } + ~OpenVINOProviderFactory() override {} std::unique_ptr CreateProvider() override; private: std::string device_type_; std::string precision_; - bool enable_npu_fast_compile_; size_t num_of_threads_; + const std::map load_config_; std::string cache_dir_; std::string model_priority_; int num_streams_; void* context_; bool enable_opencl_throttling_; bool disable_dynamic_shapes_; - bool export_ep_ctx_blob_; bool enable_qdq_optimizer_; - bool disable_cpu_fallback_; - bool so_epctx_embed_mode_; + const ConfigOptions& config_options_; }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_, + bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault("session.disable_cpu_ep_fallback", "0") == "1"; + bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1"; + bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1"; + std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str(); + + if (so_export_ep_ctx_blob && !so_cache_path.empty()) { + cache_dir_ = so_cache_path; + auto file_path = std::filesystem::path(cache_dir_); + // ep_context_file_path_ file extension must be .onnx + if (file_path.extension().generic_string() == ".onnx") { + // ep_context_file_path_ must be provided as a directory, create it if doesn't exist + auto parent_path = file_path.parent_path(); + if (!parent_path.empty() && !std::filesystem::is_directory(parent_path) && + !std::filesystem::create_directory(parent_path)) { + ORT_THROW("[ERROR] [OpenVINO] Failed to create directory : " + + file_path.parent_path().generic_string() + " \n"); + } + } else { + ORT_THROW("[ERROR] [OpenVINO] Invalid ep_ctx_file_path" + cache_dir_ + " \n"); + } + } + + OpenVINOExecutionProviderInfo info(device_type_, precision_, num_of_threads_, load_config_, cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_, - disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_, - disable_cpu_fallback_, - so_epctx_embed_mode_); + disable_dynamic_shapes_, so_export_ep_ctx_blob, enable_qdq_optimizer_, + so_disable_cpu_fallback, so_epctx_embed_mode); return std::make_unique(info); } @@ -77,41 +93,42 @@ struct OpenVINO_Provider : Provider { void* GetInfo() override { return &g_info; } std::shared_ptr CreateExecutionProviderFactory(const void* void_params) override { - auto& provider_options_map = *reinterpret_cast(void_params); - - std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision - // with these values at runtime. - std::string precision = ""; // [precision]: Sets the inference precision for execution. - // Supported precision for devices are CPU=FP32, GPU=FP32,FP16, NPU=FP16. - // Not setting precision will execute with optimized precision for - // best inference latency. set Precision=ACCURACY for executing models - // with input precision for best accuracy. - bool enable_npu_fast_compile = false; // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to - // speeds up the model's compilation to NPU device specific format. - int num_of_threads = 0; // [num_of_threads]: Overrides the accelerator default value of number of - // threads with this value at runtime. - std::string cache_dir = ""; // [cache_dir]: specify the path to - // dump and load the blobs for the model caching/kernel caching (GPU) - // feature. If blob files are already present, it will be directly loaded. - const char* model_priority = "DEFAULT"; // High-level OpenVINO model priority hint - // Defines what model should be provided with more performant - // bounded resource first - int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference - // requests to be processed on a given `device_type`. Overrides the - // accelerator default value of number of streams - // with this value at runtime. - bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU - // device (Reduces CPU Utilization when using GPU) - bool export_ep_ctx_blob = false; // Whether to export the pre-compiled blob as an EPContext model. + // Extract the void_params into ProviderOptions and ConfigOptions + typedef std::pair ConfigBuffer; + const ConfigBuffer* buffer = reinterpret_cast(void_params); + auto& provider_options_map = *buffer->first; + const ConfigOptions& config_options = buffer->second; + + std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and + // precision with these values at runtime. + std::string precision = ""; // [precision]: Sets the inference precision for execution. + // Supported precision for devices are + // CPU=FP32, GPU=FP32,FP16, NPU=FP16. + // Not setting precision will execute with optimized precision for + // best inference latency. set Precision=ACCURACY for executing + // models with input precision for best accuracy. + int num_of_threads = 0; // [num_of_threads]: Overrides the accelerator default value of + // number of threads with this value at runtime. + std::map load_config; // JSON config map to load custom OV parameters. + std::string cache_dir = ""; // [cache_dir]: specify the path to + // dump and load the blobs for the model caching/kernel caching + // (GPU) feature. If blob files are already present, + // it will be directly loaded. + std::string model_priority = "DEFAULT"; // High-level OpenVINO model priority hint + // Defines what model should be provided with more performant + // bounded resource first + int num_streams = 1; // [num_streams]: Option that specifies the number of parallel + // inference requests to be processed on a given `device_type`. + // Overrides the accelerator default value of number of streams + // with this value at runtime. + bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for + // GPU device (Reduces CPU Utilization when using GPU) + + bool enable_qdq_optimizer = false; // Enables QDQ pruning for efficient inference latency with NPU void* context = nullptr; - bool enable_qdq_optimizer = false; - - bool disable_cpu_fallback = false; - - bool so_epctx_embed_mode = true; - + std::string bool_flag = ""; if (provider_options_map.find("device_type") != provider_options_map.end()) { device_type = provider_options_map.at("device_type").c_str(); @@ -185,6 +202,68 @@ struct OpenVINO_Provider : Provider { cache_dir = provider_options_map.at("cache_dir"); } + if (provider_options_map.find("load_config") != provider_options_map.end()) { + auto parse_config = [&](const std::string& config_str) -> std::map { + // If the config string is empty, return an empty map and skip processing + if (config_str.empty()) { + LOGS_DEFAULT(WARNING) << "Empty OV Config Map passed. Skipping load_config option parsing.\n"; + return {}; + } + + std::stringstream input_str_stream(config_str); + std::map target_map; + + try { + nlohmann::json json_config = nlohmann::json::parse(input_str_stream); + + if (!json_config.is_object()) { + ORT_THROW("Invalid JSON structure: Expected an object at the root."); + } + + for (auto& [key, value] : json_config.items()) { + ov::AnyMap inner_map; + + // Ensure the key is one of "CPU", "GPU", or "NPU" + if (key != "CPU" && key != "GPU" && key != "NPU") { + LOGS_DEFAULT(WARNING) << "Unsupported device key: " << key << ". Skipping entry.\n"; + continue; + } + + // Ensure that the value for each device is an object (PROPERTY -> VALUE) + if (!value.is_object()) { + ORT_THROW("Invalid JSON structure: Expected an object for device properties."); + } + + for (auto& [inner_key, inner_value] : value.items()) { + if (inner_value.is_string()) { + inner_map[inner_key] = inner_value.get(); + } else if (inner_value.is_number_integer()) { + inner_map[inner_key] = inner_value.get(); + } else if (inner_value.is_number_float()) { + inner_map[inner_key] = inner_value.get(); + } else if (inner_value.is_boolean()) { + inner_map[inner_key] = inner_value.get(); + } else { + LOGS_DEFAULT(WARNING) << "Unsupported JSON value type for key: " << inner_key << ". Skipping key."; + } + } + target_map[key] = inner_map; + } + } catch (const nlohmann::json::parse_error& e) { + // Handle syntax errors in JSON + ORT_THROW("JSON parsing error: " + std::string(e.what())); + } catch (const nlohmann::json::type_error& e) { + // Handle invalid type accesses + ORT_THROW("JSON type error: " + std::string(e.what())); + } catch (const std::exception& e) { + ORT_THROW("Error parsing load_config Map: " + std::string(e.what())); + } + return target_map; + }; + + load_config = parse_config(provider_options_map.at("load_config")); + } + if (provider_options_map.find("context") != provider_options_map.end()) { std::string str = provider_options_map.at("context"); uint64_t number = std::strtoull(str.c_str(), nullptr, 16); @@ -224,16 +303,6 @@ struct OpenVINO_Provider : Provider { << "Executing with num_streams=1"; } } - std::string bool_flag = ""; - if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) { - bool_flag = provider_options_map.at("enable_npu_fast_compile"); - if (bool_flag == "true" || bool_flag == "True") - enable_npu_fast_compile = true; - else if (bool_flag == "false" || bool_flag == "False") - enable_npu_fast_compile = false; - bool_flag = ""; - } - if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) { bool_flag = provider_options_map.at("enable_opencl_throttling"); if (bool_flag == "true" || bool_flag == "True") @@ -249,6 +318,8 @@ struct OpenVINO_Provider : Provider { enable_qdq_optimizer = true; else if (bool_flag == "false" || bool_flag == "False") enable_qdq_optimizer = false; + else + ORT_THROW("[ERROR] [OpenVINO-EP] enable_qdq_optimiser should be a boolean.\n"); bool_flag = ""; } @@ -271,68 +342,21 @@ struct OpenVINO_Provider : Provider { disable_dynamic_shapes = false; } } - } - if (provider_options_map.find("so_export_ep_ctx_blob") != provider_options_map.end()) { - bool_flag = provider_options_map.at("so_export_ep_ctx_blob"); - if (bool_flag == "true" || bool_flag == "True") - export_ep_ctx_blob = true; - else if (bool_flag == "false" || bool_flag == "False") - export_ep_ctx_blob = false; - bool_flag = ""; - } - - if (provider_options_map.find("disable_cpu_fallback") != provider_options_map.end()) { - bool_flag = provider_options_map.at("disable_cpu_fallback"); - if (bool_flag == "true" || bool_flag == "True") - disable_cpu_fallback = true; - else if (bool_flag == "false" || bool_flag == "False") - disable_cpu_fallback = false; - bool_flag = ""; - } - if (provider_options_map.find("so_epctx_embed_mode") != provider_options_map.end()) { - bool_flag = provider_options_map.at("so_epctx_embed_mode"); - if (bool_flag == "true" || bool_flag == "True") - so_epctx_embed_mode = true; - else if (bool_flag == "false" || bool_flag == "False") - so_epctx_embed_mode = false; bool_flag = ""; } - if (provider_options_map.find("so_epctx_path") != provider_options_map.end()) { - // The path to dump epctx model is valid only when epctx is enabled. - // Overrides the cache_dir option to dump model cache files from OV. - if (export_ep_ctx_blob && - !provider_options_map.at("so_epctx_path").empty()) { - cache_dir = provider_options_map.at("so_epctx_path"); - auto file_path = std::filesystem::path(cache_dir); - // ep_context_file_path_ file extension must be .onnx - if (file_path.extension().generic_string() == ".onnx") { - // ep_context_file_path_ must be provided as a directory, create it if doesn't exist - auto parent_path = file_path.parent_path(); - if (!parent_path.empty() && !std::filesystem::is_directory(parent_path) && - !std::filesystem::create_directory(parent_path)) { - ORT_THROW("[ERROR] [OpenVINO] Failed to create directory : " + file_path.parent_path().generic_string() + " \n"); - } - } else { - ORT_THROW("[ERROR] [OpenVINO] Invalid ep_ctx_file_path" + cache_dir + " \n"); - } - } - } - - return std::make_shared(const_cast(device_type.c_str()), - const_cast(precision.c_str()), - enable_npu_fast_compile, + return std::make_shared(device_type, + precision, num_of_threads, - const_cast(cache_dir.c_str()), + load_config, + cache_dir, model_priority, num_streams, context, enable_opencl_throttling, disable_dynamic_shapes, - export_ep_ctx_blob, enable_qdq_optimizer, - disable_cpu_fallback, - so_epctx_embed_mode); + config_options); } void Initialize() override { diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h index bff70a90b6a70..0cbf051c6df26 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h @@ -14,8 +14,7 @@ namespace onnxruntime { struct SessionOptions; // defined in provider_bridge_ort.cc struct OpenVINOProviderFactoryCreator { - static std::shared_ptr Create(ProviderOptions* provider_options_map, + static std::shared_ptr Create(const ProviderOptions* provider_options_map, const SessionOptions* session_options); - static std::shared_ptr Create(const OrtOpenVINOProviderOptions* provider_options); }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 3fcaff4369c89..0d7ac64d86e68 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -35,16 +35,16 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, device_type_ = "CPU"; if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true; } -#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0 - data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, npu_qdq_optimizer_enabled); -#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1 +#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1 data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled); #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2 data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled); #elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3 data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled); +#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4 + data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled); #else - data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled); + data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled); #endif } @@ -59,7 +59,7 @@ std::vector> GetCapability::Execute() { // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc. std::unordered_set ng_required_initializers; - const auto unsupported_nodes = data_ops_->GetUnsupportedNodeIndices(ng_required_initializers); + const auto unsupported_nodes = data_ops_->GetUnsupportedNodeIndices(ng_required_initializers, has_external_weights_); #ifndef NDEBUG if (openvino_ep::backend_utils::IsDebugEnabled()) { std::cout << "No of unsupported nodes " << unsupported_nodes.size() << std::endl; diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h index 63c83158accf8..2f87c4c73d892 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.h +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h @@ -16,6 +16,7 @@ class GetCapability { std::string device_type_; DataOps* data_ops_; bool is_wholly_supported_graph_ = false; + bool has_external_weights_ = false; public: GetCapability(const GraphViewer& graph_viewer_param, @@ -25,6 +26,9 @@ class GetCapability { bool IsWhollySupportedGraph() { return is_wholly_supported_graph_; } + bool HasExternalWeights() { + return has_external_weights_; + } }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index d9aa13ec1bba9..e8f6ae0a43734 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -281,6 +281,10 @@ void DataOps::populate_types_supported() { std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64)); supported_types_npu_.insert( std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)); + supported_types_npu_.insert( + std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN)); + supported_types_npu_.insert( + std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ)); supported_types_cpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)); @@ -328,6 +332,7 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}}); no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}}); no_dimension_supported_.push_back({"Expand", V_2023_3, {"CPU"}}); + no_dimension_supported_.push_back({"Expand", V_2024_3, {"CPU", "GPU"}}); no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}}); @@ -363,7 +368,7 @@ void DataOps::populate_op_mode_supported() { // populate unsupportedmode_t { - UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3}, + UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4}, [this](const Node* node, const InitializedTensorSet&) { // If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch) for (size_t i = 0; i < node->InputDefs().size(); i++) { @@ -378,7 +383,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"ReduceMax", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_arg = node->InputDefs()[1]; auto shape = input_arg->Shape(); @@ -395,7 +400,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Reshape", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -410,7 +415,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Unsqueeze", obj}); } { - UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); @@ -583,11 +588,21 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) { } } -bool DataOps::unsupported_op_mode(const Node* node) { +bool DataOps::unsupported_op_mode(const Node* node, bool& has_external_weights_) { bool result = false; const auto& optype = node->OpType(); const auto& initializers = graph_viewer_.GetAllInitializedTensors(); + for (const auto& tensor_pair : initializers) { + const ONNX_NAMESPACE::TensorProto* tensor_proto = tensor_pair.second; + // Check if the tensor exists and if it has an external data location + if (tensor_proto && tensor_proto->has_data_location() && + tensor_proto->data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) { + has_external_weights_ = true; + break; + } + } + auto iter = op_list_.equal_range(optype); for (auto it = iter.first; it != iter.second; ++it) { auto ob = it->second; @@ -637,7 +652,7 @@ bool DataOps::dimension_unsupported(const Node* node) { return true; } -bool DataOps::node_is_supported(const NodeIndex node_idx) { +bool DataOps::node_is_supported(const NodeIndex node_idx, bool& has_external_weights_) { const auto& node = graph_viewer_.GetNode(node_idx); const auto& optype = node->OpType(); @@ -745,7 +760,7 @@ bool DataOps::node_is_supported(const NodeIndex node_idx) { } // Check 3a - if (domain == kOnnxDomain && unsupported_op_mode(node)) { + if (domain == kOnnxDomain && unsupported_op_mode(node, has_external_weights_)) { if (optype == "GatherElements") { return true; } @@ -760,11 +775,12 @@ bool DataOps::node_is_supported(const NodeIndex node_idx) { return true; } -std::vector DataOps::GetUnsupportedNodeIndices(std::unordered_set& ng_required_initializers) { +std::vector DataOps::GetUnsupportedNodeIndices(std::unordered_set& ng_required_initializers, + bool& has_external_weights_) { std::vector unsupported_nodes_idx; for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) { - if (node_is_supported(node_idx)) { + if (node_is_supported(node_idx, has_external_weights_)) { // Collect inputs that are initializers graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) { diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index 4c064b08405c1..5cd4c8658fb77 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -30,7 +30,8 @@ enum versionNum { V_2024_0, V_2024_1, V_2024_2, - V_2024_3 + V_2024_3, + V_2024_4 }; using VersionNum = enum versionNum; @@ -70,9 +71,9 @@ class DataOps { void populate_types_supported(); bool op_is_supported(std::string name, std::vector& list); bool dimension_unsupported(const Node* node); - bool unsupported_op_mode(const Node* node); + bool unsupported_op_mode(const Node* node, bool& has_external_weights_); bool type_is_supported(const NodeArg* node_arg, bool is_initializer); - bool node_is_supported(const NodeIndex node_idx); + bool node_is_supported(const NodeIndex node_idx, bool& has_external_weights_); public: DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, @@ -85,7 +86,8 @@ class DataOps { populate_types_supported(); } - virtual std::vector GetUnsupportedNodeIndices(std::unordered_set& ng_required_initializers); + virtual std::vector GetUnsupportedNodeIndices( + std::unordered_set& ng_required_initializers, bool& has_external_weights_); virtual bool IsOpSupportedOnlyInModel(std::string name); virtual bool SpecialConditionForClusterSizeOne( std::unordered_set& ng_required_initializers, const Node* node); diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 34319287a80fd..3efc715fc3037 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -578,6 +578,8 @@ struct ProviderHost { // ConfigOptions virtual std::optional ConfigOptions__GetConfigEntry(const ConfigOptions* p, const std::string& config_key) = 0; + virtual std::string ConfigOptions__GetConfigOrDefault(const ConfigOptions* p, const std::string& config_key, + const std::string& default_value) = 0; // OrtRunOptions virtual const ConfigOptions& RunOptions__GetConfigOptions(const RunOptions* p) = 0; diff --git a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h index 4644f703dcb5d..b9e0951a740a2 100644 --- a/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h +++ b/onnxruntime/core/providers/shared_library/provider_wrappedtypes.h @@ -485,6 +485,10 @@ struct ConfigOptions final { return g_host->ConfigOptions__GetConfigEntry(this, config_key); } + std::string GetConfigOrDefault(const std::string& config_key, const std::string& default_value) const { + return g_host->ConfigOptions__GetConfigOrDefault(this, config_key, default_value); + } + PROVIDER_DISALLOW_ALL(ConfigOptions) }; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 85079ef78c8d3..2c4bffa4fb79f 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -41,6 +41,7 @@ #include "core/session/onnxruntime_c_api.h" #include "core/common/string_helper.h" +#include #ifdef ENABLE_TRAINING #ifdef ENABLE_TRAINING_TORCH_INTEROP @@ -706,6 +707,12 @@ struct ProviderHostImpl : ProviderHost { return p->GetConfigEntry(config_key); } + // ConfigOptions (wrapped) + std::string ConfigOptions__GetConfigOrDefault(const ConfigOptions* p, const std::string& config_key, + const std::string& default_value) override { + return p->GetConfigOrDefault(config_key, default_value); + } + // OrtRunOptions (wrapped) const ConfigOptions& RunOptions__GetConfigOptions(const RunOptions* p) override { return p->config_options; } @@ -1783,12 +1790,6 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O if (legacy_ov_options->device_type != nullptr) ov_options_converted_map["device_type"] = legacy_ov_options->device_type; - if (legacy_ov_options->enable_npu_fast_compile) { - ov_options_converted_map["enable_npu_fast_compile"] = "false"; - } else { - ov_options_converted_map["enable_npu_fast_compile"] = "true"; - } - if (legacy_ov_options->num_of_threads != '\0') ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads); @@ -1809,51 +1810,24 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O ov_options_converted_map["disable_dynamic_shapes"] = "true"; } + if (legacy_ov_options->enable_npu_fast_compile) { + LOGS_DEFAULT(WARNING) << "enable_npu_fast_compile option is deprecated. Skipping this option"; + } // Add new provider option below ov_options_converted_map["num_streams"] = "1"; - ov_options_converted_map["export_ep_ctx_blob"] = "false"; + ov_options_converted_map["load_config"] = ""; ov_options_converted_map["model_priority"] = "DEFAULT"; ov_options_converted_map["enable_qdq_optimizer"] = "false"; return ov_options_converted_map; } -std::shared_ptr OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) { - ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options); - return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map); -} - -void ORTSessionOptionsToOrtOpenVINOProviderOptions(ProviderOptions& ov_options, - const SessionOptions* session_options) { - bool disable_cpu_fallback = session_options->config_options.GetConfigOrDefault( - kOrtSessionOptionsDisableCPUEPFallback, "0") == "1"; - if (disable_cpu_fallback) - ov_options["disable_cpu_fallback"] = "true"; - - // values from session options will override the providerOptions Value - bool so_epctx_enable = session_options->config_options.GetConfigOrDefault( - kOrtSessionOptionEpContextEnable, "0") == "1"; - if (so_epctx_enable) - ov_options["so_export_ep_ctx_blob"] = "true"; - - std::string so_cache_path = session_options->config_options.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str(); - ov_options["so_epctx_path"] = so_cache_path; - - // Default embedMode is 1. Saving the compiled model contents as a Epctx node attribute - bool so_epctx_embed_mode = session_options->config_options.GetConfigOrDefault( - kOrtSessionOptionEpContextEmbedMode, "1") == "0"; - if (so_epctx_embed_mode) { - // defaults to true - ov_options["so_epctx_embed_mode"] = "false"; - } -} - -std::shared_ptr OpenVINOProviderFactoryCreator::Create(ProviderOptions* provider_options_map, - const SessionOptions* session_options) { +std::shared_ptr OpenVINOProviderFactoryCreator::Create( + const ProviderOptions* provider_options_map, const SessionOptions* session_options) { // Append session options applicable for EP to EP Provider options. - if (session_options) { - onnxruntime::ORTSessionOptionsToOrtOpenVINOProviderOptions(*provider_options_map, session_options); - } - return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map); + std::pair config_buffer = {provider_options_map, + session_options->config_options}; + const void* obj = reinterpret_cast(&config_buffer); + return s_library_openvino.Get().CreateExecutionProviderFactory(obj); } std::shared_ptr DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) { @@ -2106,9 +2080,11 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_MIGraphX, _In API_IMPL_END } -ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) { +ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, + _In_ const OrtOpenVINOProviderOptions* provider_options) { API_IMPL_BEGIN - auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(provider_options); + const onnxruntime::ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options); + auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&ov_options_converted_map, &(options->value)); if (!factory) { return OrtApis::CreateStatus(ORT_FAIL, "SessionOptionsAppendExecutionProvider_OpenVINO: Failed to load shared library"); } diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 3062738eefcf2..63757a6120fa3 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1062,12 +1062,6 @@ std::unique_ptr CreateExecutionProviderInstance( } else if (option.first == "precision") { OV_provider_options_map[option.first] = option.second; continue; - } else if (option.first == "enable_npu_fast_compile") { - if (!(option.second == "True" || option.second == "true" || - option.second == "False" || option.second == "false")) { - ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second); - } - OV_provider_options_map[option.first] = option.second; } else if (option.first == "enable_opencl_throttling") { if (!(option.second == "True" || option.second == "true" || option.second == "False" || option.second == "false")) { @@ -1103,13 +1097,13 @@ std::unique_ptr CreateExecutionProviderInstance( } else if (option.first == "num_streams") { OV_provider_options_map[option.first] = option.second; continue; - } else if (option.first == "cache_dir") { + } else if (option.first == "load_config") { OV_provider_options_map[option.first] = option.second; continue; - } else if (option.first == "context") { + } else if (option.first == "cache_dir") { OV_provider_options_map[option.first] = option.second; continue; - } else if (option.first == "export_ep_ctx_blob") { + } else if (option.first == "context") { OV_provider_options_map[option.first] = option.second; continue; } else if (option.first == "enable_qdq_optimizer") { diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 42b73ec384cf5..9e1098b24f611 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -76,11 +76,10 @@ namespace perftest { "\n" "\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" - "\t [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n" "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" "\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n" "\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n" - "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" + "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU num_of_threads|5 enable_opencl_throttling|true cache_dir|\"\"\"\n" "\n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index eb230ac771e13..a369c36ae9c43 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -18,6 +18,10 @@ #include "providers.h" #include "TestCase.h" +#ifdef USE_OPENVINO +#include "nlohmann/json.hpp" +#endif + #ifdef USE_DML #include "core/providers/dml/dml_provider_factory.h" #include "core/providers/dml/dml_session_options_config_keys.h" @@ -39,13 +43,8 @@ std::chrono::duration OnnxRuntimeTestSession::Run() { auto& input = test_inputs_.at(id); auto start = std::chrono::high_resolution_clock::now(); - if (!use_device_mem) { - auto output_values = session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), - output_names_raw_ptr.data(), output_names_raw_ptr.size()); - } else { - session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), - output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); - } + session_.Run(Ort::RunOptions{nullptr}, input_names_.data(), input.data(), input_names_.size(), + output_names_raw_ptr.data(), outputs_.data(), output_names_raw_ptr.size()); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration_seconds = end - start; @@ -807,13 +806,6 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); ORT_THROW("[ERROR] [OpenVINO] Unsupported inference precision is selected. CPU only supports FP32 . \n"); } } - } else if (key == "enable_npu_fast_compile") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n"); - } } else if (key == "enable_opencl_throttling") { if (value == "true" || value == "True" || value == "false" || value == "False") { @@ -843,6 +835,28 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); } else { ov_options[key] = value; } + } else if (key == "load_config") { + auto load_json = [&](std::string filename) -> std::string { + std::ifstream input_filestream(filename); + if (!input_filestream.is_open()) { + ORT_THROW("Passed an invalid JSON config file path \"" + filename + "\"."); + } + nlohmann::json json_config; + try { + input_filestream >> json_config; + } catch (const OnnxRuntimeException& ex) { + ORT_THROW("Exception parsing config file \"" + filename + "\".\n" + ex.what()); + } catch (const std::exception& ex) { + throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what()); + } catch (...) { + throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n"); + } + if (json_config.empty()) { + ORT_THROW("Empty JSON content passed \"" + filename + "\"."); + } + return json_config.dump(); + }; + ov_options[key] = load_json(value); } else if (key == "model_priority") { ov_options[key] = value; } else if (key == "cache_dir") { @@ -855,21 +869,13 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); } else { ov_options[key] = value; } - } else if (key == "export_ep_ctx_blob") { - if (value == "true" || value == "True" || - value == "false" || value == "False") { - ov_options[key] = value; - } else { - ORT_THROW( - "[ERROR] [OpenVINO] The value for the key 'export_ep_ctx_blob' " - "should be a boolean i.e. true or false. Default value is false.\n"); - } - } else if (key == "use_device_mem") { - if (value == "true" || value == "True") { - use_device_mem = true; - } + } else if (key == "device_memory_name") { + device_memory_name_ = std::move(value); } else { - ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n"); + ORT_THROW( + "[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO." + " ['device_type', 'device_id', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', " + "'enable_opencl_throttling', 'disable_dynamic_shapes', 'enable_qdq_optimizer', 'model_priority'] \n"); } } session_options.AppendExecutionProvider_OpenVINO_V2(ov_options); @@ -912,25 +918,31 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); input_names_[i] = input_names_str_[i].c_str(); } - if (use_device_mem) { - Ort::MemoryInfo memory_info = Ort::MemoryInfo("OpenVINO_RT_NPU", OrtArenaAllocator, 0, OrtMemTypeCPUOutput); + auto transform_fcn = std::function(); + auto new_value = std::function&, Ort::ConstTensorTypeAndShapeInfo&)>(); + if (device_memory_name_.empty()) { + transform_fcn = [](int64_t input) { return input; }; + new_value = [](OrtAllocator*, const std::vector&, Ort::ConstTensorTypeAndShapeInfo&) { + return Ort::Value(nullptr); + }; + } else { + Ort::MemoryInfo memory_info = Ort::MemoryInfo(device_memory_name_.data(), OrtArenaAllocator, 0, OrtMemTypeCPUOutput); custom_allocator_ = std::make_unique(session_, memory_info); - for (size_t i = 0; i < output_names_raw_ptr.size(); i++) { - Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i); - auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - - std::vector output_shape = tensor_info.GetShape(); + allocator_ = *custom_allocator_; - // free dimensions are treated as 1 if not overridden - for (int64_t& dim : output_shape) { - if (dim == -1) { - dim = 1; - } - } + // free dimensions are treated as 1 if not overridden + transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; + new_value = [](OrtAllocator* allocator, const std::vector& output_shape, Ort::ConstTensorTypeAndShapeInfo& tensor_info) { + return Ort::Value::CreateTensor(allocator, output_shape.data(), output_shape.size(), tensor_info.GetElementType()); + }; + } - outputs_.push_back(Ort::Value::CreateTensor(*custom_allocator_, (const int64_t*)output_shape.data(), - output_shape.size(), tensor_info.GetElementType())); - } + for (size_t i = 0; i < output_names_raw_ptr.size(); i++) { + Ort::TypeInfo type_info = session_.GetOutputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + std::vector output_shape = tensor_info.GetShape(); + std::transform(output_shape.begin(), output_shape.end(), output_shape.begin(), transform_fcn); + outputs_.emplace_back(new_value(allocator_, output_shape, tensor_info)); } } @@ -1020,29 +1032,16 @@ bool OnnxRuntimeTestSession::PopulateGeneratedInputTestData(int32_t seed) { Ort::TypeInfo type_info = session_.GetInputTypeInfo(i); if (type_info.GetONNXType() == ONNX_TYPE_TENSOR) { auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); - if (!use_device_mem) { - Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - } std::vector input_node_dim = tensor_info.GetShape(); // free dimensions are treated as 1 if not overridden - for (int64_t& dim : input_node_dim) { - if (dim == -1) { - dim = 1; - } - } - if (use_device_mem) { - Ort::Value input_tensor = Ort::Value::CreateTensor(*custom_allocator_, (const int64_t*)input_node_dim.data(), - input_node_dim.size(), tensor_info.GetElementType()); - InitializeTensorWithSeed(seed, input_tensor); - PreLoadTestData(0, i, std::move(input_tensor)); - } else { - auto allocator = Ort::AllocatorWithDefaultOptions(); - Ort::Value input_tensor = Ort::Value::CreateTensor(allocator, (const int64_t*)input_node_dim.data(), - input_node_dim.size(), tensor_info.GetElementType()); - InitializeTensorWithSeed(seed, input_tensor); - PreLoadTestData(0, i, std::move(input_tensor)); - } + auto transform_fcn = [](int64_t input) { return (input == -1) ? -input : input; }; + std::transform(input_node_dim.begin(), input_node_dim.end(), input_node_dim.begin(), transform_fcn); + + Ort::Value input_tensor = Ort::Value::CreateTensor(allocator_, (const int64_t*)input_node_dim.data(), + input_node_dim.size(), tensor_info.GetElementType()); + InitializeTensorWithSeed(seed, input_tensor); + PreLoadTestData(0, i, std::move(input_tensor)); } } return true; diff --git a/onnxruntime/test/perftest/ort_test_session.h b/onnxruntime/test/perftest/ort_test_session.h index e33041a2a0958..7d5e46983ad41 100644 --- a/onnxruntime/test/perftest/ort_test_session.h +++ b/onnxruntime/test/perftest/ort_test_session.h @@ -38,6 +38,7 @@ class OnnxRuntimeTestSession : public TestSession { std::mt19937 rand_engine_; std::uniform_int_distribution dist_; std::vector> test_inputs_; + OrtAllocator* allocator_ = Ort::AllocatorWithDefaultOptions(); std::unique_ptr custom_allocator_; std::vector outputs_; std::vector output_names_; @@ -48,7 +49,7 @@ class OnnxRuntimeTestSession : public TestSession { std::vector input_names_str_; const int input_length_; std::string provider_name_; - bool use_device_mem = false; + std::string device_memory_name_; // Device memory type name to use from the list in allocator.h }; } // namespace perftest diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index 177647ab5be6b..e3c86a137484f 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -570,7 +570,7 @@ ::std::vector<::std::basic_string> GetParameterStrings() { ORT_TSTR("yolov3"), ORT_TSTR("LSTM_Seq_lens_unpacked"), ORT_TSTR("tinyyolov3"), - ORT_TSTR("faster_rcnn"), + // ORT_TSTR("faster_rcnn"), ORT_TSTR("mask_rcnn"), ORT_TSTR("coreml_FNS-Candy_ImageNet"), ORT_TSTR("tf_mobilenet_v2_1.0_224"), @@ -581,7 +581,7 @@ ::std::vector<::std::basic_string> GetParameterStrings() { ORT_TSTR("mlperf_ssd_resnet34_1200"), ORT_TSTR("candy"), ORT_TSTR("cntk_simple_seg"), - ORT_TSTR("GPT2_LM_HEAD"), + // ORT_TSTR("GPT2_LM_HEAD"), ORT_TSTR("mlperf_ssd_mobilenet_300"), ORT_TSTR("fp16_coreml_FNS-Candy"), ORT_TSTR("fp16_test_tiny_yolov2"), diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index d07e01c1a4e27..d57a22f024d5f 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -99,11 +99,13 @@ std::unique_ptr MIGraphXExecutionProviderWithOptions(const O return nullptr; } -std::unique_ptr OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) { +std::unique_ptr OpenVINOExecutionProviderWithOptions(const ProviderOptions* params, + const SessionOptions* session_options) { #ifdef USE_OPENVINO - return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider(); + return OpenVINOProviderFactoryCreator::Create(params, session_options)->CreateProvider(); #else ORT_UNUSED_PARAMETER(params); + ORT_UNUSED_PARAMETER(session_options); return nullptr; #endif } diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index 1fd9894e09d4e..ed95bf67f1ffb 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -49,7 +49,7 @@ std::unique_ptr TensorrtExecutionProviderWithOptions(const O std::unique_ptr TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params); std::unique_ptr DefaultMIGraphXExecutionProvider(); std::unique_ptr MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params); -std::unique_ptr OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params); +std::unique_ptr OpenVINOExecutionProviderWithOptions(const ProviderOptions* params, const SessionOptions* session_options = nullptr); std::unique_ptr DefaultOpenVINOExecutionProvider(); std::unique_ptr DefaultNnapiExecutionProvider(); std::unique_ptr DefaultVSINPUExecutionProvider();