-
Notifications
You must be signed in to change notification settings - Fork 23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Npu allocator #437
Npu allocator #437
Changes from all commits
89094a8
2e4b205
63e8aee
cd88b0c
89127f0
d43219f
274e6af
6feae84
c1f3b3e
fea4752
e19f326
524d766
1e3dadd
61a2d4a
5800966
075b14d
59ba9c7
5a3c793
a7f19aa
20bca3b
df617dd
331679f
6ed4988
92652ed
2a06f44
b83f8ac
e812ca6
077881a
0060915
1468d38
2334215
97f9e64
6517a12
3c2a997
abe9f67
94b55a7
966c48a
a6004c5
ef44c87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,14 +48,6 @@ | |
// Set the inference_num_threads property of the CPU | ||
SetNumThreads(device_config); | ||
|
||
#ifndef NDEBUG | ||
if (IsDebugEnabled()) { | ||
std::string file_name = subgraph_context.subgraph_name + "_static.onnx"; | ||
std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary); | ||
model_proto.SerializeToOstream(outfile); | ||
} | ||
#endif | ||
|
||
try { | ||
std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str; | ||
|
||
|
@@ -295,16 +287,99 @@ | |
ORT_THROW(msg); | ||
} | ||
} else { | ||
OVTensorPtr graph_input_blob; | ||
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name)); | ||
auto allocator_name = tensor.GetTensorMemoryInfo().GetAllocatorName(); | ||
ov_tensor_data_t ov_tensor_key; | ||
ort_tensor_key_t ort_tensor_key{tensor.GetTensorRawData(), allocator_name}; | ||
if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { | ||
ov_tensor_key = it->second; | ||
} else { | ||
// Does this make sense for both types of allocators? | ||
auto input = ie_cnn_network_->get_parameters().at(input_idx); | ||
ov_tensor_key.tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input->get_shape(), | ||
(void*)tensor.GetTensorRawData()); | ||
Check notice on line 300 in onnxruntime/core/providers/openvino/backends/basic_backend.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/backends/basic_backend.cc#L300
Raw output
|
||
if (allocator_name == OpenVINO_RT_NPU) { | ||
ov_tensor_key.copy_needed = false; | ||
} else { | ||
ov_tensor_key.copy_needed = true; | ||
} | ||
ort_ov_tensor_map.emplace(ort_tensor_key, ov_tensor_key); | ||
|
||
try { | ||
infer_request->SetTensor(input_name, ov_tensor_key.tensor_ptr); | ||
} catch (const char* msg) { | ||
ORT_THROW(msg); | ||
} | ||
} | ||
|
||
if (ov_tensor_key.copy_needed) { | ||
const char* ort_tensor_data = tensor.GetTensorData<char>(); | ||
size_t tensor_data_size = ov_tensor_key.tensor_ptr->get_byte_size(); | ||
auto ort_batch_memory_offset = ort_tensor_data + tensor_data_size * batch_slice_idx; | ||
std::memcpy(ov_tensor_key.tensor_ptr->data(), ort_batch_memory_offset, tensor_data_size); | ||
} | ||
} | ||
input_idx++; | ||
} | ||
|
||
// Set the output blob as remote blob | ||
auto graph_output_info = exe_network_.Get().outputs(); | ||
auto output_idx = 0; | ||
for (auto output_info_iter = graph_output_info.begin(); | ||
output_info_iter != graph_output_info.end(); ++output_info_iter) { | ||
auto output_names = output_info_iter->get_names(); | ||
std::string onnx_output_name; | ||
std::string output_name; | ||
bool output_name_found = false; | ||
// using the output name retrieved from ONNX original to match with the output names returned by OV tensors | ||
for (auto it = subgraph_context_.output_names.begin(); it != subgraph_context_.output_names.end(); ++it) { | ||
onnx_output_name = it->first; | ||
if (output_names.find(onnx_output_name) != output_names.end()) { | ||
// Assigning the output_name | ||
output_name = it->first; | ||
output_name_found = true; | ||
break; | ||
} | ||
} | ||
if (!output_name_found) { | ||
ORT_THROW( | ||
log_tag + | ||
"Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + | ||
onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names"); | ||
} | ||
|
||
size_t batch_size = 1; | ||
Ort::UnownedValue tensor = GetOutputTensor(context, | ||
batch_size, | ||
infer_request, | ||
output_name, | ||
subgraph_context_.output_names); | ||
auto allocator_name = tensor.GetTensorMemoryInfo().GetAllocatorName(); | ||
|
||
ov_tensor_data_t ov_tensor_data; | ||
ort_tensor_key_t ort_tensor_key{tensor.GetTensorRawData(), allocator_name}; | ||
if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { | ||
ov_tensor_data = it->second; | ||
} else { | ||
auto output = ie_cnn_network_->get_results().at(output_idx); | ||
ov_tensor_data.tensor_ptr = std::make_shared<ov::Tensor>(output->get_element_type(), output->get_shape(), | ||
(void*)tensor.GetTensorRawData()); | ||
Check notice on line 366 in onnxruntime/core/providers/openvino/backends/basic_backend.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/backends/basic_backend.cc#L366
Raw output
|
||
if(allocator_name == OpenVINO_RT_NPU) { | ||
Check notice on line 367 in onnxruntime/core/providers/openvino/backends/basic_backend.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/backends/basic_backend.cc#L367
Raw output
|
||
ov_tensor_data.copy_needed = false; | ||
} else { | ||
ov_tensor_data.copy_needed = true; | ||
} | ||
ort_ov_tensor_map.emplace(ort_tensor_key, ov_tensor_data); | ||
|
||
try { | ||
graph_input_blob = infer_request->GetTensor(input_name); | ||
infer_request->SetTensor(output_name, ov_tensor_data.tensor_ptr); | ||
} catch (const char* msg) { | ||
ORT_THROW(msg); | ||
} | ||
FillInputBlob(std::move(graph_input_blob), batch_slice_idx, std::move(input_name), context, subgraph_context_); | ||
} | ||
input_idx++; | ||
output_idx++; | ||
} | ||
|
||
// Start Async inference | ||
infer_request->StartAsync(); | ||
} catch (const char* msg) { | ||
|
@@ -430,7 +505,6 @@ | |
auto graph_output_info = exe_network_.Get().outputs(); | ||
for (auto output_info_iter = graph_output_info.begin(); | ||
output_info_iter != graph_output_info.end(); ++output_info_iter) { | ||
OVTensorPtr graph_output_blob; | ||
auto output_names = output_info_iter->get_names(); | ||
std::string onnx_output_name; | ||
std::string output_name; | ||
|
@@ -454,20 +528,24 @@ | |
" doesn't exist in the " | ||
"list of OpenVINO output tensor names"); | ||
} | ||
try { | ||
graph_output_blob = infer_request->GetTensor(output_name); | ||
} catch (const char* msg) { | ||
ORT_THROW(msg); | ||
} | ||
|
||
size_t batch_size = 1; | ||
Ort::UnownedValue output_tensor = | ||
GetOutputTensor(context, batch_size, infer_request, std::move(output_name), subgraph_context_.output_names); | ||
auto mem_info = output_tensor.GetTensorMemoryInfo(); | ||
if (mem_info.GetAllocatorName() == OpenVINO_GPU) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check if this has effect on OpenVINO_GPU IOBuffer |
||
return; | ||
auto allocator_name = output_tensor.GetTensorMemoryInfo().GetAllocatorName(); | ||
ov_tensor_data_t ov_tensor_data; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check if the declaration in startasyncinference is redundant There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we will require the ov_tensor_data for creating the input/output tensor before the inference in startasyncinference |
||
ort_tensor_key_t ort_tensor_key{output_tensor.GetTensorRawData(), allocator_name}; | ||
if (const auto& it = ort_ov_tensor_map.find(ort_tensor_key); it != ort_ov_tensor_map.end()) { | ||
ov_tensor_data = it->second; | ||
} else { | ||
size_t batch_slice = 0; | ||
FillOutputBlob(std::move(graph_output_blob), output_tensor, batch_slice); | ||
ORT_THROW(log_tag + "Expected all outputs to have associated OV::Tensor's"); | ||
} | ||
|
||
if (ov_tensor_data.copy_needed) { | ||
auto ort_tensor_data = output_tensor.GetTensorMutableData<char>(); | ||
size_t tensor_data_size = ov_tensor_data.tensor_ptr->get_byte_size(); | ||
auto ort_batch_memory_offset = ort_tensor_data /*+ tensor_data_size * batch_size*/; | ||
std::memcpy(ort_batch_memory_offset, ov_tensor_data.tensor_ptr->data(), tensor_data_size); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright (C) Intel Corporation | ||
|
||
// Licensed under the MIT License | ||
|
||
#include "core/providers/openvino/ov_allocator.h" | ||
#include "core/providers/openvino/ov_interface.h" | ||
#include "openvino/runtime/intel_npu/level_zero/level_zero.hpp" | ||
#include "openvino/runtime/intel_npu/properties.hpp" | ||
|
||
namespace onnxruntime { | ||
|
||
using namespace openvino_ep; | ||
Check notice on line 11 in onnxruntime/core/providers/openvino/ov_allocator.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/ov_allocator.cc#L11
Raw output
|
||
|
||
constexpr size_t default_alignment = 4096; | ||
|
||
static inline size_t align_up(size_t size, size_t pow2_alignment) { | ||
return (size + pow2_alignment - 1) & ~(pow2_alignment - 1); | ||
} | ||
|
||
OVRTAllocator::OVRTAllocator(ov::Core& core, OrtDevice::DeviceType device_type, OrtDevice::DeviceId device_id, const char* name) : IAllocator(OrtMemoryInfo(name, OrtAllocatorType::OrtDeviceAllocator, OrtDevice(device_type, OrtDevice::MemType::DEFAULT, device_id), device_id, OrtMemTypeCPUInput)), core_(core) { | ||
Check notice on line 19 in onnxruntime/core/providers/openvino/ov_allocator.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/ov_allocator.cc#L19
Raw output
|
||
if (device_type == OrtDevice::NPU) { | ||
remote_ctx_ = core_.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>(); | ||
} else { | ||
ORT_THROW("Invalid device type"); | ||
} | ||
} | ||
|
||
void* OVRTAllocator::Alloc(size_t size) { | ||
try { | ||
size_t alloc_size = align_up(size + sizeof(ov::Tensor*) + default_alignment, default_alignment); | ||
ov::Tensor* tensor = new ov::Tensor(remote_ctx_.create_host_tensor(ov::element::Type_t::u8, | ||
{ alloc_size })); | ||
uintptr_t data_ptr = reinterpret_cast<uintptr_t>(tensor->data()); | ||
|
||
ov::Tensor** ptr = reinterpret_cast<ov::Tensor**>(align_up(data_ptr + sizeof(ov::Tensor*), default_alignment)); | ||
ptr[-1] = tensor; | ||
|
||
return reinterpret_cast<void*>(ptr); | ||
|
||
Check notice on line 38 in onnxruntime/core/providers/openvino/ov_allocator.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/ov_allocator.cc#L38
Raw output
|
||
} catch (const ov::Exception& e) { | ||
ORT_THROW(std::string("Alloc failed: ") + e.what()); | ||
} | ||
return nullptr; | ||
} | ||
|
||
void OVRTAllocator::Free(void* p) { | ||
try { | ||
ov::Tensor** ptr = reinterpret_cast<ov::Tensor**>(p); | ||
delete ptr[-1]; | ||
} catch (const ov::Exception& e) { | ||
ORT_THROW(std::string("Free failed: ") + e.what()); | ||
Check notice on line 50 in onnxruntime/core/providers/openvino/ov_allocator.cc GitHub Actions / cpplint[cpplint] onnxruntime/core/providers/openvino/ov_allocator.cc#L50
Raw output
|
||
} | ||
} | ||
|
||
} // namespace onnxruntime |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Copyright (C) Intel Corporation | ||
|
||
// Licensed under the MIT License | ||
|
||
#pragma once | ||
|
||
#include "core/common/inlined_containers.h" | ||
#include "core/framework/allocator.h" | ||
#include "openvino/runtime/remote_context.hpp" | ||
|
||
|
||
namespace onnxruntime { | ||
|
||
class OVRTAllocator : public IAllocator { | ||
public: | ||
OVRTAllocator(ov::Core &core, OrtDevice::DeviceType device_type, OrtDevice::DeviceId device_id, const char* name); | ||
void* Alloc(size_t size) override; | ||
void Free(void* p) override; | ||
|
||
private: | ||
ov::Core &core_; | ||
ov::RemoteContext remote_ctx_; | ||
}; | ||
|
||
} // namespace onnxruntime |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OpenVINO_NPU is a redefinition of OpenVINO_RT_NPU.
Remove if its not referenced in the code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not referenced so removed it in the new pr