Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] Add support for i16, u16, and u32 element types in remote tensors #27573

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,8 @@ inline cldnn::layout make_layout(const ov::element::Type type, const ov::Shape&
inline ov::element::Type convert_to_supported_device_type(ov::element::Type et) {
switch (et) {
case ov::element::f64:
case ov::element::i16:
case ov::element::u16:
return ov::element::f32;
case ov::element::u64:
case ov::element::u32:
return ov::element::i32;
default: return et;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@ ParamsKey ReorderKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::BF16);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::UINT16);
k.EnableInputDataType(Datatype::UINT32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::INT16);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT16);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::UINT16);
k.EnableOutputDataType(Datatype::UINT32);
k.EnableOutputDataType(Datatype::BF16);
k.EnableSurfaceInputSupport();
k.EnableDifferentTypes();
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/plugin/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ void convert_and_copy(const ov::ITensor* src, ov::ITensor* dst, const cldnn::str
tmp_tensor = ov::Tensor(dst_et, src->get_shape());
::convert_and_copy(src_ptr, src_et, tmp_tensor.data(), dst_et, size, cldnn::layout({}, ov::element::undefined, cldnn::format::bfyx, cldnn::padding()));
remote->copy_from(get_tensor_impl(tmp_tensor)._ptr);
return;
} else {
dst_ptr = dst->data();
}
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v
}

cldnn::format input_format = cldnn::format::get_default_format(input_pshape.size());
auto element_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_output_element_type(0)));
auto element_type = convert_to_supported_device_type(op->get_output_element_type(0));
element_type = element_type == ov::element::boolean ? ov::element::u8 : element_type;

// look at the expected color format of this input
auto input_name = layer_type_name_ID(op);
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/plugin/ops/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ static void CreateResultOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0::
auto out_format = cldnn::format::get_default_format(out_rank);

auto out_primitive_name = layer_type_name_ID(op);
auto out_data_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_input_element_type(0)));
auto out_data_type = convert_to_supported_device_type(op->get_input_element_type(0));
out_data_type = out_data_type == ov::element::boolean ? ov::element::u8 : out_data_type;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can it be embedded into convert_to_supported_device_type?


auto reorder_primitive = cldnn::reorder(out_primitive_name,
inputs[0],
Expand Down
45 changes: 38 additions & 7 deletions src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ inline bool all_host_tensors(const std::vector<ov::SoPtr<ov::ITensor>>& tensors)
});
}

cldnn::data_types data_type_for_remote_tensor(ov::element::Type t) {
switch (t) {
case ov::element::Type_t::f64:
return cldnn::data_types::f32;
case ov::element::Type_t::u64:
return cldnn::data_types::i32;
case ov::element::Type_t::boolean:
return cldnn::data_types::u8;
default: return t;
}
}

} // namespace

namespace ov {
Expand Down Expand Up @@ -446,6 +458,21 @@ void SyncInferRequest::wait() {
iremote_tensor_ptr->copy_from(plugin_tensor.ptr);
}
}
} else if (!is_dynamic && is_remote_tensor_impl && output_memory) {
auto& stream = m_graph->get_network()->get_stream();
auto user_mem = remote_tensor_impl_ptr->get_original_memory();
if (user_mem->get_allocation_type() == cldnn::allocation_type::cl_mem
&& output_memory->get_allocation_type() != cldnn::allocation_type::cl_mem) {
auto plugin_tensor = m_plugin_outputs.at(port_idx);
if (is_convert_required(plugin_tensor.ptr->get_element_type(), iremote_tensor_ptr->get_element_type())) {
auto& stream = m_graph->get_network()->get_stream();
convert_and_copy(plugin_tensor.ptr.get(), iremote_tensor_ptr.get(), stream);
} else {
iremote_tensor_ptr->copy_from(plugin_tensor.ptr);
}
} else {
copy_events.push_back(output_memory->copy_to(stream, *user_mem, false));
}
} else if (is_remote_tensor_impl && is_dynamic) {
auto& stream = m_graph->get_network()->get_stream();
auto user_mem = remote_tensor_impl_ptr->get_original_memory();
Expand Down Expand Up @@ -522,7 +549,7 @@ std::shared_ptr<ov::ITensor> SyncInferRequest::create_device_tensor(const ov::Pa

return std::make_shared<RemoteTensorImpl>(m_context,
get_tensor_shape(port_shape),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
tensor_type);
}

Expand Down Expand Up @@ -553,7 +580,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe
} else if (usm_host_raw_ptr && can_share) {
return { std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()), TensorOwner::USER };
}
Expand Down Expand Up @@ -785,16 +812,16 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
if (is_remote_tensor_impl) {
if (convert_needed) {
m_plugin_inputs[input_idx] = { create_device_tensor(pshape,
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
false), TensorOwner::PLUGIN };
} else {
m_plugin_inputs[input_idx] = user_tensor_wrapper;
}
} else if (is_usm_host_tensor && !convert_needed && can_use_usm_host(engine)) {
if (element_type != cldnn::element_type_to_data_type(element_type)) {
if (element_type != ::data_type_for_remote_tensor(element_type)) {
m_plugin_inputs[input_idx] = { std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()), TensorOwner::USER };
} else {
Expand Down Expand Up @@ -953,8 +980,12 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_output(size_t output_id
is_generic_remote ||
(m_plugin_outputs[output_idx].owner == TensorOwner::USER && !is_remote_tensor_impl);
if (update_device_tensor) {
m_plugin_outputs[output_idx] =
create_or_share_device_tensor(user_tensor_wrapper, internal_name, pshape, device_tensor_et, need_lockable_mem || convert_needed);
if (!is_remote_tensor_impl) {
m_plugin_outputs[output_idx] =
create_or_share_device_tensor(user_tensor_wrapper, internal_name, pshape, device_tensor_et, need_lockable_mem || convert_needed);
} else {
m_plugin_outputs[output_idx] = { create_device_tensor(pshape, device_tensor_et, need_lockable_mem || convert_needed), TensorOwner::PLUGIN };
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2873,3 +2873,93 @@ TEST(RemoteTensor, smoke_CanSetRoiRemoteTensor) {

compare_tensors(output_tensor_copy_0, output_tensor_copy_1);
}


using RemoteTensorDataTypesOptionsParams = std::tuple<ov::element::Type_t>;
class OVRemoteTensorDataType_Test : public OVRemoteTensor_Test,
public testing::WithParamInterface<RemoteTensorDataTypesOptionsParams> {
protected:
std::shared_ptr<ov::Model> fn_ptr;
std::string deviceName;
ov::AnyMap config;
ov::element::Type_t element_type;

public:
void SetUp() override {
deviceName = ov::test::utils::DEVICE_GPU;
std::tie(element_type) = this->GetParam();
config = {ov::hint::inference_precision(ov::element::f16),
ov::hint::model_priority(ov::hint::Priority::HIGH),
ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE),
ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)};

auto input1 = std::make_shared<ov::op::v0::Parameter>(element_type, ov::Shape{1, 2, 10, 10});
auto constant = ov::op::v0::Constant::create(element_type, ov::Shape{1, 2, 10, 10}, {1});
auto add = std::make_shared<ov::op::v1::Add>(input1, constant);
fn_ptr = std::make_shared<ov::Model>(ov::NodeVector{add}, ov::ParameterVector{input1});
}
static std::string getTestCaseName(const testing::TestParamInfo<RemoteTensorDataTypesOptionsParams>& obj) {
ov::element::Type_t elem_type;
std::tie(elem_type) = obj.param;

std::ostringstream result;
result << "OVRemoteTensorTest_" << elem_type;
return result.str();
}
};

TEST_P(OVRemoteTensorDataType_Test, smoke_RemoteTensorDataType) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
auto ppp = ov::preprocess::PrePostProcessor(fn_ptr);
ppp.output(0).tensor().set_element_type(element_type);
auto ov_model = ppp.build();

auto core = ov::Core();
ov::CompiledModel compiled_model = core.compile_model(ov_model, deviceName, config);

// regular inference
auto inf_req = compiled_model.create_infer_request();
auto input_element_type = inf_req.get_input_tensor(0).get_element_type();
auto input_shape = inf_req.get_input_tensor(0).get_shape();
auto output_element_type = inf_req.get_output_tensor(0).get_element_type();
auto output_shape = inf_req.get_output_tensor(0).get_shape();

ASSERT_EQ(input_element_type, element_type);
ASSERT_EQ(output_element_type, element_type);

auto remote_context = compiled_model.get_context().as<ov::intel_gpu::ocl::ClContext>();
auto input_tensor = ov::test::utils::create_and_fill_tensor(input_element_type, input_shape);
auto output_tensor = ov::test::utils::create_and_fill_tensor(output_element_type, output_shape);

auto input_cl_tensor = remote_context.create_tensor(input_element_type, input_shape);
auto output_cl_tensor = remote_context.create_tensor(output_element_type, output_shape);

input_cl_tensor.copy_from(input_tensor);

inf_req.set_input_tensor(0, input_tensor);
inf_req.set_output_tensor(0, output_tensor);
inf_req.infer();

inf_req.set_input_tensor(0, input_cl_tensor);
inf_req.set_output_tensor(0, output_cl_tensor);
inf_req.infer();

auto tmp_tensor = ov::Tensor(output_element_type, output_shape);
output_cl_tensor.copy_to(tmp_tensor);

if (element_type == ov::element::i16) {
compare_data<ov::element_type_traits<ov::element::i16>::value_type>(output_tensor, tmp_tensor);
} else if (element_type == ov::element::u16) {
compare_data<ov::element_type_traits<ov::element::u16>::value_type>(output_tensor, tmp_tensor);
} else if (element_type == ov::element::u32) {
compare_data<ov::element_type_traits<ov::element::u32>::value_type>(output_tensor, tmp_tensor);
}
}

INSTANTIATE_TEST_SUITE_P(smoke_RemoteTensorDataType, OVRemoteTensorDataType_Test,
::testing::Combine(::testing::Values(ov::element::Type_t::i16,
ov::element::Type_t::u16,
ov::element::Type_t::u32)),
OVRemoteTensorDataType_Test::getTestCaseName);
Loading