Skip to content

Commit

Permalink
Dynamic shapes build 24.02 (#72)
Browse files Browse the repository at this point in the history
* Revert "Revert "Add support for models with dynamic shapes (#69)" (#70)"

This reverts commit 25934f6.

* Fix building after style fixes
  • Loading branch information
atobiszei authored Mar 13, 2024
1 parent 25934f6 commit 18bbf09
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 53 deletions.
49 changes: 37 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ and
is provided, then `dynamic_batching` will be enabled with default settings.


### Examples of the "config.pbtxt" files depending on the use case
## Examples of the "config.pbtxt" file sections depending on the use case

### Latency mode

Latency mode with low concurrency on the client side. Recommended for performance optimization with low number of parallel clients.
```
Expand All @@ -157,6 +159,8 @@ parameters: [
]
```

### Throughput mode

Throughput mode with high concurrency on the client side. Recommended for throughput optimization with high number of parallel clients.
Number of streams should be lower or equal to number of parallel clients and lower of equal to the number of CPU cores.
For example, with ~20 clients on the host with 12 CPU cores, the config could be like:
Expand All @@ -177,19 +181,14 @@ parameters: [
]
```

### Loading non default model format

When loading model with the non default format of Intermediate Representation and the name model.xml, use and extra parameter "default_model_filename".
For example, using TensorFlow saved_model format use:
```
default_model_filename: "model.saved_model"
parameters: [
{
key: "PERFORMANCE_HINT"
value: {
string_value: "LATENCY"
}
}
]
```

and copy the model to the subfolder called "model.saved_model"
```
model_repository/
Expand All @@ -201,12 +200,38 @@ model_repository/
└── config.pbtxt
```
Other allowed values are `model.pdmodel` or `model.onnx`.
### Reshaping models

Following section shows how to use OpenVINO dynamic shapes. `-1` denotes dimension accepting any value on input. In this case
while model originally accepted input with layout `NCHW` and shape `(1,3,224,224)`, now it accepts any batch size and resolution.

## Known Issues
*Note*: If the model is originally exported with dynamic shapes, there is no need to manually specify dynamic shapes in config.

* Models with dynamic shape are not supported in this backend now.
```
input [
{
name: "input"
data_type: TYPE_FP32
dims: [ -1, 3, -1, -1]
}
]
output [
{
name: "output"
data_type: TYPE_FP32
dims: [ -1, 1001]
}
]
parameters: {
key: "RESHAPE_IO_LAYERS"
value: {
string_value:"yes"
}
}
```

* As of now, the Openvino backend does not support variable shaped tensors. However, the dynamic batch sizes in the model are supported. See `SKIP_OV_DYNAMIC_BATCHSIZE` and `ENABLE_BATCH_PADDING` parameters for more details.
## Known Issues

* Models with the scalar on the input (shape without any dimension are not supported)
* Reshaping using [dimension ranges](https://docs.openvino.ai/2023.3/ovms_docs_dynamic_shape_dynamic_model.html) is not supported.
44 changes: 25 additions & 19 deletions src/openvino.cc
Original file line number Diff line number Diff line change
Expand Up @@ -554,24 +554,32 @@ ModelState::ValidateInputs(const size_t expected_input_cnt)
}

ov::Shape input_shape;
ov::PartialShape partial_input_shape;
RETURN_IF_OPENVINO_ASSIGN_ERROR(
input_shape,
model_inputs[model_inputs_name_to_index[io_name]].get_shape(),
partial_input_shape,
model_inputs[model_inputs_name_to_index[io_name]].get_partial_shape(),
("retrieving original shapes from input " + io_name).c_str());

if (reshape_io_layers_) {
int index = (MaxBatchSize() != 0) ? 1 : 0;
for (const auto dim : dims) {
input_shape[index++] = dim;
if (dim > 0) {
partial_input_shape[index++] = ov::Dimension(dim);
} else if (dim == -1) {
partial_input_shape[index++] = ov::Dimension::dynamic();
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
"openvino backend does not support dimensions values"
" other than `-1` or positive integers");
}
}
RETURN_IF_OPENVINO_ERROR(
ppp.input(io_name).tensor().set_shape(input_shape),
ppp.input(io_name).tensor().set_shape(partial_input_shape),
std::string("setting shape for " + io_name).c_str());
} else {
RETURN_IF_ERROR(CompareDimsSupported(
Name(), io_name,
std::vector<size_t>(input_shape.begin(), input_shape.end()), dims,
MaxBatchSize(), false /* compare_exact */));
Name(), io_name, partial_input_shape, dims, MaxBatchSize(),
false /* compare_exact */));
}

if (MaxBatchSize()) {
Expand Down Expand Up @@ -645,15 +653,14 @@ ModelState::ValidateOutputs()
} else {
RETURN_IF_ERROR(ParseShape(io, "dims", &dims));
}
ov::Shape output_shape;
ov::PartialShape output_shape;
RETURN_IF_OPENVINO_ASSIGN_ERROR(
output_shape,
model_outputs[model_outputs_name_to_index[io_name]].get_shape(),
model_outputs[model_outputs_name_to_index[io_name]].get_partial_shape(),
("retrieving original shapes from output " + io_name).c_str());
RETURN_IF_ERROR(CompareDimsSupported(
Name(), io_name,
std::vector<size_t>(output_shape.begin(), output_shape.end()), dims,
MaxBatchSize(), true /* compare_exact */));
Name(), io_name, output_shape, dims, MaxBatchSize(),
true /* compare_exact */));
}

// Model preprocessing
Expand Down Expand Up @@ -812,17 +819,18 @@ ModelState::AutoCompleteInputOrOutput(
"data_type",
OpenVINOElementToModelConfigDataType(ov_io.get_element_type())));
// Find shape
ov::Shape io_shape;
ov::PartialShape io_shape;
RETURN_IF_OPENVINO_ASSIGN_ERROR(
io_shape, ov_io.get_shape(),
io_shape, ov_io.get_partial_shape(),
("retrieving original shapes from" + std::string(io_json_obj_name) +
" " + io_name)
.c_str());
// Populate dims
triton::common::TritonJson::Value dims(
ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
for (size_t i = (MaxBatchSize() > 0) ? 1 : 0; i < io_shape.size(); i++) {
RETURN_IF_ERROR(dims.AppendInt(io_shape[i]));
RETURN_IF_ERROR(dims.AppendInt(
io_shape.is_static() ? io_shape[i].get_length() : -1));
}
RETURN_IF_ERROR(io_json.Add("dims", std::move(dims)));
// Add individual input/output to new input/output
Expand All @@ -842,7 +850,6 @@ ModelState::AutoCompleteInputOrOutput(
"': " + io_json_obj_name + " already specified")
.c_str());
}

return nullptr; // success
}

Expand Down Expand Up @@ -931,7 +938,7 @@ ModelInstanceState::ModelInstanceState(
throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("unable to load model '") + model_state_->Name() +
"', openVINO backend supports only CPU device")
"', Triton openVINO backend supports only CPU device")
.c_str()));
}

Expand Down Expand Up @@ -1363,7 +1370,6 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
const char* cname;
RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
std::string name(cname);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("TRITONBACKEND_Initialize: ") + name).c_str());
Expand Down
43 changes: 23 additions & 20 deletions src/openvino_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,24 +195,25 @@ OpenVINOElementToModelConfigDataType(const ov::element::Type& data_type)
return "TYPE_INVALID";
}

static bool
doesMatch(const ov::Dimension& ov_dim, int64_t config_dim)
{
if (ov_dim.is_static()) {
return ov_dim.get_length() == config_dim;
}
if (!ov_dim.get_interval().has_upper_bound()) {
return true;
}
return (config_dim < ov_dim.get_max_length()) &&
(config_dim > ov_dim.get_min_length());
}

TRITONSERVER_Error*
CompareDimsSupported(
const std::string& model_name, const std::string& tensor_name,
const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
const int max_batch_size, const bool compare_exact)
{
// TODO: OpenVINO backend does not support the dynamic shapes as of now.
// We can use RESIZE_BILINEAR preProcess in InputInfo to support dynamic
// shapes in future.
for (const auto& dim : dims) {
RETURN_ERROR_IF_TRUE(
(dim == -1), TRITONSERVER_ERROR_INVALID_ARG,
std::string("model '") + model_name + "', tensor '" + tensor_name +
"': provides -1 dim (shape " + ShapeToString(dims) +
"), openvino "
"currently does not support dynamic shapes.");
}

// If the model configuration expects batching support in the model,
// then the openvino first dimension will be reshaped hence should not
// be compared.
Expand All @@ -232,9 +233,8 @@ CompareDimsSupported(
bool succ = (model_shape.size() == (size_t)full_dims.size());
if (succ) {
for (size_t i = 0; i < full_dims.size(); ++i) {
const int64_t model_dim = model_shape[i];
if (compare_exact || (i != 0)) {
succ &= (model_dim == full_dims[i]);
if (compare_exact || ((i != 0) && (full_dims[i] != -1))) {
succ &= doesMatch(model_shape[i], full_dims[i]);
}
}
}
Expand All @@ -256,8 +256,7 @@ CompareDimsSupported(
bool succ = (model_shape.size() == dims.size());
if (succ) {
for (size_t i = 0; i < dims.size(); ++i) {
const int64_t model_dim = model_shape[i];
succ &= (model_dim == dims[i]);
succ &= doesMatch(model_shape[i], dims[i]);
}
}

Expand Down Expand Up @@ -289,9 +288,13 @@ ReadParameter(
}

std::vector<int64_t>
ConvertToSignedShape(const std::vector<size_t> shape)
ConvertToSignedShape(const ov::PartialShape& shape)
{
return std::vector<int64_t>{shape.begin(), shape.end()};
std::vector<int64_t> out;
for (const auto& dim : shape) {
out.emplace_back(dim.is_static() ? dim.get_length() : -1);
}
return out;
}

}}} // namespace triton::backend::openvino
4 changes: 2 additions & 2 deletions src/openvino_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,13 @@ std::string OpenVINOElementToModelConfigDataType(

TRITONSERVER_Error* CompareDimsSupported(
const std::string& model_name, const std::string& tensor_name,
const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
const int max_batch_size, const bool compare_exact);

TRITONSERVER_Error* ReadParameter(
triton::common::TritonJson::Value& params, const std::string& key,
std::string* param);

std::vector<int64_t> ConvertToSignedShape(const std::vector<size_t> shape);
std::vector<int64_t> ConvertToSignedShape(const ov::PartialShape& shape);

}}} // namespace triton::backend::openvino

0 comments on commit 18bbf09

Please sign in to comment.