Skip to content

Commit

Permalink
[GPU] Add support for i16, u16, and u32 element types in remote tenso…
Browse files Browse the repository at this point in the history
…rs (#27573)

### Details:
- *Removed host memory data converting for user input/output tensors
with data types i16, u16, or u32.*
- *User tensors can now be directly used as plugin tensors without
additional data conversion overhead.*

### Tickets:
 - *156709*
  • Loading branch information
jade-cho authored Nov 19, 2024
1 parent 5bde1ab commit ea36b83
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,8 @@ inline cldnn::layout make_layout(const ov::element::Type type, const ov::Shape&
inline ov::element::Type convert_to_supported_device_type(ov::element::Type et) {
switch (et) {
case ov::element::f64:
case ov::element::i16:
case ov::element::u16:
return ov::element::f32;
case ov::element::u64:
case ov::element::u32:
return ov::element::i32;
default: return et;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,23 @@ ParamsKey ReorderKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::BF16);
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::UINT16);
k.EnableInputDataType(Datatype::UINT32);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::INT16);
k.EnableInputDataType(Datatype::INT32);
k.EnableInputDataType(Datatype::INT64);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::INT16);
k.EnableOutputDataType(Datatype::INT32);
k.EnableOutputDataType(Datatype::INT64);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::UINT16);
k.EnableOutputDataType(Datatype::UINT32);
k.EnableOutputDataType(Datatype::BF16);
k.EnableSurfaceInputSupport();
k.EnableDifferentTypes();
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/plugin/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ void convert_and_copy(const ov::ITensor* src, ov::ITensor* dst, const cldnn::str
tmp_tensor = ov::Tensor(dst_et, src->get_shape());
::convert_and_copy(src_ptr, src_et, tmp_tensor.data(), dst_et, size, cldnn::layout({}, ov::element::undefined, cldnn::format::bfyx, cldnn::padding()));
remote->copy_from(get_tensor_impl(tmp_tensor)._ptr);
return;
} else {
dst_ptr = dst->data();
}
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/plugin/ops/parameter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ static void CreateParameterOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v
}

cldnn::format input_format = cldnn::format::get_default_format(input_pshape.size());
auto element_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_output_element_type(0)));
auto element_type = convert_to_supported_device_type(op->get_output_element_type(0));
element_type = element_type == ov::element::boolean ? ov::element::u8 : element_type;

// look at the expected color format of this input
auto input_name = layer_type_name_ID(op);
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/plugin/ops/result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ static void CreateResultOp(ProgramBuilder& p, const std::shared_ptr<ov::op::v0::
auto out_format = cldnn::format::get_default_format(out_rank);

auto out_primitive_name = layer_type_name_ID(op);
auto out_data_type = cldnn::element_type_to_data_type(convert_to_supported_device_type(op->get_input_element_type(0)));
auto out_data_type = convert_to_supported_device_type(op->get_input_element_type(0));
out_data_type = out_data_type == ov::element::boolean ? ov::element::u8 : out_data_type;

auto reorder_primitive = cldnn::reorder(out_primitive_name,
inputs[0],
Expand Down
45 changes: 38 additions & 7 deletions src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,18 @@ inline bool all_host_tensors(const std::vector<ov::SoPtr<ov::ITensor>>& tensors)
});
}

cldnn::data_types data_type_for_remote_tensor(ov::element::Type t) {
switch (t) {
case ov::element::Type_t::f64:
return cldnn::data_types::f32;
case ov::element::Type_t::u64:
return cldnn::data_types::i32;
case ov::element::Type_t::boolean:
return cldnn::data_types::u8;
default: return t;
}
}

} // namespace

namespace ov {
Expand Down Expand Up @@ -446,6 +458,21 @@ void SyncInferRequest::wait() {
iremote_tensor_ptr->copy_from(plugin_tensor.ptr);
}
}
} else if (!is_dynamic && is_remote_tensor_impl && output_memory) {
auto& stream = m_graph->get_network()->get_stream();
auto user_mem = remote_tensor_impl_ptr->get_original_memory();
if (user_mem->get_allocation_type() == cldnn::allocation_type::cl_mem
&& output_memory->get_allocation_type() != cldnn::allocation_type::cl_mem) {
auto plugin_tensor = m_plugin_outputs.at(port_idx);
if (is_convert_required(plugin_tensor.ptr->get_element_type(), iremote_tensor_ptr->get_element_type())) {
auto& stream = m_graph->get_network()->get_stream();
convert_and_copy(plugin_tensor.ptr.get(), iremote_tensor_ptr.get(), stream);
} else {
iremote_tensor_ptr->copy_from(plugin_tensor.ptr);
}
} else {
copy_events.push_back(output_memory->copy_to(stream, *user_mem, false));
}
} else if (is_remote_tensor_impl && is_dynamic) {
auto& stream = m_graph->get_network()->get_stream();
auto user_mem = remote_tensor_impl_ptr->get_original_memory();
Expand Down Expand Up @@ -522,7 +549,7 @@ std::shared_ptr<ov::ITensor> SyncInferRequest::create_device_tensor(const ov::Pa

return std::make_shared<RemoteTensorImpl>(m_context,
get_tensor_shape(port_shape),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
tensor_type);
}

Expand Down Expand Up @@ -553,7 +580,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe
} else if (usm_host_raw_ptr && can_share) {
return { std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()), TensorOwner::USER };
}
Expand Down Expand Up @@ -785,16 +812,16 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
if (is_remote_tensor_impl) {
if (convert_needed) {
m_plugin_inputs[input_idx] = { create_device_tensor(pshape,
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
false), TensorOwner::PLUGIN };
} else {
m_plugin_inputs[input_idx] = user_tensor_wrapper;
}
} else if (is_usm_host_tensor && !convert_needed && can_use_usm_host(engine)) {
if (element_type != cldnn::element_type_to_data_type(element_type)) {
if (element_type != ::data_type_for_remote_tensor(element_type)) {
m_plugin_inputs[input_idx] = { std::make_shared<RemoteTensorImpl>(m_context,
user_tensor->get_shape(),
cldnn::element_type_to_data_type(element_type),
::data_type_for_remote_tensor(element_type),
TensorType::BT_USM_SHARED,
user_tensor->data()), TensorOwner::USER };
} else {
Expand Down Expand Up @@ -953,8 +980,12 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_output(size_t output_id
is_generic_remote ||
(m_plugin_outputs[output_idx].owner == TensorOwner::USER && !is_remote_tensor_impl);
if (update_device_tensor) {
m_plugin_outputs[output_idx] =
create_or_share_device_tensor(user_tensor_wrapper, internal_name, pshape, device_tensor_et, need_lockable_mem || convert_needed);
if (!is_remote_tensor_impl) {
m_plugin_outputs[output_idx] =
create_or_share_device_tensor(user_tensor_wrapper, internal_name, pshape, device_tensor_et, need_lockable_mem || convert_needed);
} else {
m_plugin_outputs[output_idx] = { create_device_tensor(pshape, device_tensor_et, need_lockable_mem || convert_needed), TensorOwner::PLUGIN };
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2873,3 +2873,93 @@ TEST(RemoteTensor, smoke_CanSetRoiRemoteTensor) {

compare_tensors(output_tensor_copy_0, output_tensor_copy_1);
}


using RemoteTensorDataTypesOptionsParams = std::tuple<ov::element::Type_t>;
class OVRemoteTensorDataType_Test : public OVRemoteTensor_Test,
public testing::WithParamInterface<RemoteTensorDataTypesOptionsParams> {
protected:
std::shared_ptr<ov::Model> fn_ptr;
std::string deviceName;
ov::AnyMap config;
ov::element::Type_t element_type;

public:
void SetUp() override {
deviceName = ov::test::utils::DEVICE_GPU;
std::tie(element_type) = this->GetParam();
config = {ov::hint::inference_precision(ov::element::f16),
ov::hint::model_priority(ov::hint::Priority::HIGH),
ov::hint::execution_mode(ov::hint::ExecutionMode::PERFORMANCE),
ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)};

auto input1 = std::make_shared<ov::op::v0::Parameter>(element_type, ov::Shape{1, 2, 10, 10});
auto constant = ov::op::v0::Constant::create(element_type, ov::Shape{1, 2, 10, 10}, {1});
auto add = std::make_shared<ov::op::v1::Add>(input1, constant);
fn_ptr = std::make_shared<ov::Model>(ov::NodeVector{add}, ov::ParameterVector{input1});
}
static std::string getTestCaseName(const testing::TestParamInfo<RemoteTensorDataTypesOptionsParams>& obj) {
ov::element::Type_t elem_type;
std::tie(elem_type) = obj.param;

std::ostringstream result;
result << "OVRemoteTensorTest_" << elem_type;
return result.str();
}
};

TEST_P(OVRemoteTensorDataType_Test, smoke_RemoteTensorDataType) {
#if defined(ANDROID)
GTEST_SKIP();
#endif
auto ppp = ov::preprocess::PrePostProcessor(fn_ptr);
ppp.output(0).tensor().set_element_type(element_type);
auto ov_model = ppp.build();

auto core = ov::Core();
ov::CompiledModel compiled_model = core.compile_model(ov_model, deviceName, config);

// regular inference
auto inf_req = compiled_model.create_infer_request();
auto input_element_type = inf_req.get_input_tensor(0).get_element_type();
auto input_shape = inf_req.get_input_tensor(0).get_shape();
auto output_element_type = inf_req.get_output_tensor(0).get_element_type();
auto output_shape = inf_req.get_output_tensor(0).get_shape();

ASSERT_EQ(input_element_type, element_type);
ASSERT_EQ(output_element_type, element_type);

auto remote_context = compiled_model.get_context().as<ov::intel_gpu::ocl::ClContext>();
auto input_tensor = ov::test::utils::create_and_fill_tensor(input_element_type, input_shape);
auto output_tensor = ov::test::utils::create_and_fill_tensor(output_element_type, output_shape);

auto input_cl_tensor = remote_context.create_tensor(input_element_type, input_shape);
auto output_cl_tensor = remote_context.create_tensor(output_element_type, output_shape);

input_cl_tensor.copy_from(input_tensor);

inf_req.set_input_tensor(0, input_tensor);
inf_req.set_output_tensor(0, output_tensor);
inf_req.infer();

inf_req.set_input_tensor(0, input_cl_tensor);
inf_req.set_output_tensor(0, output_cl_tensor);
inf_req.infer();

auto tmp_tensor = ov::Tensor(output_element_type, output_shape);
output_cl_tensor.copy_to(tmp_tensor);

if (element_type == ov::element::i16) {
compare_data<ov::element_type_traits<ov::element::i16>::value_type>(output_tensor, tmp_tensor);
} else if (element_type == ov::element::u16) {
compare_data<ov::element_type_traits<ov::element::u16>::value_type>(output_tensor, tmp_tensor);
} else if (element_type == ov::element::u32) {
compare_data<ov::element_type_traits<ov::element::u32>::value_type>(output_tensor, tmp_tensor);
}
}

INSTANTIATE_TEST_SUITE_P(smoke_RemoteTensorDataType, OVRemoteTensorDataType_Test,
::testing::Combine(::testing::Values(ov::element::Type_t::i16,
ov::element::Type_t::u16,
ov::element::Type_t::u32)),
OVRemoteTensorDataType_Test::getTestCaseName);

0 comments on commit ea36b83

Please sign in to comment.