Dynamic shapes build 24.02 (#72)

* Revert "Revert "Add support for models with dynamic shapes (#69)" (#70)" This reverts commit 25934f6. * Fix building after style fixes
triton-inference-server · Mar 13, 2024 · 18bbf09 · 18bbf09
1 parent 25934f6
commit 18bbf09
Show file tree

Hide file tree

Showing 4 changed files with 87 additions and 53 deletions.
diff --git a/README.md b/README.md
@@ -137,7 +137,9 @@ and
 is provided, then `dynamic_batching` will be enabled with default settings.
 
 
-### Examples of the "config.pbtxt" files depending on the use case
+## Examples of the "config.pbtxt" file sections depending on the use case
+
+### Latency mode
 
 Latency mode with low concurrency on the client side. Recommended for performance optimization with low number of parallel clients.
 ```
@@ -157,6 +159,8 @@ parameters: [
 ]
 ```
 
+### Throughput mode
+
 Throughput mode with high concurrency on the client side. Recommended for throughput optimization with high number of parallel clients.
 Number of streams should be lower or equal to number of parallel clients and lower of equal to the number of CPU cores.
 For example, with ~20 clients on the host with 12 CPU cores, the config could be like:
@@ -177,19 +181,14 @@ parameters: [
 ]
 ```
 
+### Loading non default model format
+
 When loading model with the non default format of Intermediate Representation and the name model.xml, use and extra parameter "default_model_filename".
 For example, using TensorFlow saved_model format use:
 ```
 default_model_filename: "model.saved_model"
-parameters: [
-{
-   key: "PERFORMANCE_HINT"
-   value: {
-     string_value: "LATENCY"
-   }
-}
-]
 ```
+
 and copy the model to the subfolder called "model.saved_model"
 ```
 model_repository/
@@ -201,12 +200,38 @@ model_repository/
     └── config.pbtxt
 
 ```
+Other allowed values are `model.pdmodel` or `model.onnx`.
+### Reshaping models
 
+Following section shows how to use OpenVINO dynamic shapes. `-1` denotes dimension accepting any value on input. In this case
+while model originally accepted input with layout `NCHW` and shape `(1,3,224,224)`, now it accepts any batch size and resolution.
 
-## Known Issues
+*Note*: If the model is originally exported with dynamic shapes, there is no need to manually specify dynamic shapes in config.
 
-* Models with dynamic shape are not supported in this backend now.
+```
+input [
+  {
+    name: "input"
+    data_type: TYPE_FP32
+    dims: [ -1, 3, -1, -1]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [ -1, 1001]
+  }
+]
+parameters: {
+key: "RESHAPE_IO_LAYERS"
+value: {
+string_value:"yes"
+}
+}
+```
 
-* As of now, the Openvino backend does not support variable shaped tensors. However, the dynamic batch sizes in the model are supported. See `SKIP_OV_DYNAMIC_BATCHSIZE` and `ENABLE_BATCH_PADDING` parameters for more details.
+## Known Issues
 
 * Models with the scalar on the input (shape without any dimension are not supported)
+* Reshaping using [dimension ranges](https://docs.openvino.ai/2023.3/ovms_docs_dynamic_shape_dynamic_model.html) is not supported.
diff --git a/src/openvino.cc b/src/openvino.cc
@@ -554,24 +554,32 @@ ModelState::ValidateInputs(const size_t expected_input_cnt)
     }
 
     ov::Shape input_shape;
+    ov::PartialShape partial_input_shape;
     RETURN_IF_OPENVINO_ASSIGN_ERROR(
-        input_shape,
-        model_inputs[model_inputs_name_to_index[io_name]].get_shape(),
+        partial_input_shape,
+        model_inputs[model_inputs_name_to_index[io_name]].get_partial_shape(),
         ("retrieving original shapes from input " + io_name).c_str());
-
     if (reshape_io_layers_) {
       int index = (MaxBatchSize() != 0) ? 1 : 0;
       for (const auto dim : dims) {
-        input_shape[index++] = dim;
+        if (dim > 0) {
+          partial_input_shape[index++] = ov::Dimension(dim);
+        } else if (dim == -1) {
+          partial_input_shape[index++] = ov::Dimension::dynamic();
+        } else {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              "openvino backend does not support dimensions values"
+              " other than `-1` or positive integers");
+        }
       }
       RETURN_IF_OPENVINO_ERROR(
-          ppp.input(io_name).tensor().set_shape(input_shape),
+          ppp.input(io_name).tensor().set_shape(partial_input_shape),
           std::string("setting shape for " + io_name).c_str());
     } else {
       RETURN_IF_ERROR(CompareDimsSupported(
-          Name(), io_name,
-          std::vector<size_t>(input_shape.begin(), input_shape.end()), dims,
-          MaxBatchSize(), false /* compare_exact */));
+          Name(), io_name, partial_input_shape, dims, MaxBatchSize(),
+          false /* compare_exact */));
     }
 
     if (MaxBatchSize()) {
@@ -645,15 +653,14 @@ ModelState::ValidateOutputs()
     } else {
       RETURN_IF_ERROR(ParseShape(io, "dims", &dims));
     }
-    ov::Shape output_shape;
+    ov::PartialShape output_shape;
     RETURN_IF_OPENVINO_ASSIGN_ERROR(
         output_shape,
-        model_outputs[model_outputs_name_to_index[io_name]].get_shape(),
+        model_outputs[model_outputs_name_to_index[io_name]].get_partial_shape(),
         ("retrieving original shapes from output " + io_name).c_str());
     RETURN_IF_ERROR(CompareDimsSupported(
-        Name(), io_name,
-        std::vector<size_t>(output_shape.begin(), output_shape.end()), dims,
-        MaxBatchSize(), true /* compare_exact */));
+        Name(), io_name, output_shape, dims, MaxBatchSize(),
+        true /* compare_exact */));
   }
 
   // Model preprocessing
@@ -812,17 +819,18 @@ ModelState::AutoCompleteInputOrOutput(
           "data_type",
           OpenVINOElementToModelConfigDataType(ov_io.get_element_type())));
       // Find shape
-      ov::Shape io_shape;
+      ov::PartialShape io_shape;
       RETURN_IF_OPENVINO_ASSIGN_ERROR(
-          io_shape, ov_io.get_shape(),
+          io_shape, ov_io.get_partial_shape(),
           ("retrieving original shapes from" + std::string(io_json_obj_name) +
            " " + io_name)
               .c_str());
       // Populate dims
       triton::common::TritonJson::Value dims(
           ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
       for (size_t i = (MaxBatchSize() > 0) ? 1 : 0; i < io_shape.size(); i++) {
-        RETURN_IF_ERROR(dims.AppendInt(io_shape[i]));
+        RETURN_IF_ERROR(dims.AppendInt(
+            io_shape.is_static() ? io_shape[i].get_length() : -1));
       }
       RETURN_IF_ERROR(io_json.Add("dims", std::move(dims)));
       // Add individual input/output to new input/output
@@ -842,7 +850,6 @@ ModelState::AutoCompleteInputOrOutput(
          "': " + io_json_obj_name + " already specified")
             .c_str());
   }
-
   return nullptr;  // success
 }
 
@@ -931,7 +938,7 @@ ModelInstanceState::ModelInstanceState(
     throw triton::backend::BackendModelInstanceException(TRITONSERVER_ErrorNew(
         TRITONSERVER_ERROR_INVALID_ARG,
         (std::string("unable to load model '") + model_state_->Name() +
-         "', openVINO backend supports only CPU device")
+         "', Triton openVINO backend supports only CPU device")
             .c_str()));
   }
 
@@ -1363,7 +1370,6 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
   const char* cname;
   RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
   std::string name(cname);
-
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
       (std::string("TRITONBACKEND_Initialize: ") + name).c_str());

diff --git a/src/openvino_utils.cc b/src/openvino_utils.cc
@@ -195,24 +195,25 @@ OpenVINOElementToModelConfigDataType(const ov::element::Type& data_type)
   return "TYPE_INVALID";
 }
 
+static bool
+doesMatch(const ov::Dimension& ov_dim, int64_t config_dim)
+{
+  if (ov_dim.is_static()) {
+    return ov_dim.get_length() == config_dim;
+  }
+  if (!ov_dim.get_interval().has_upper_bound()) {
+    return true;
+  }
+  return (config_dim < ov_dim.get_max_length()) &&
+         (config_dim > ov_dim.get_min_length());
+}
+
 TRITONSERVER_Error*
 CompareDimsSupported(
     const std::string& model_name, const std::string& tensor_name,
-    const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
+    const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
     const int max_batch_size, const bool compare_exact)
 {
-  // TODO: OpenVINO backend does not support the dynamic shapes as of now.
-  // We can use RESIZE_BILINEAR preProcess in InputInfo to support dynamic
-  // shapes in future.
-  for (const auto& dim : dims) {
-    RETURN_ERROR_IF_TRUE(
-        (dim == -1), TRITONSERVER_ERROR_INVALID_ARG,
-        std::string("model '") + model_name + "', tensor '" + tensor_name +
-            "': provides -1 dim (shape " + ShapeToString(dims) +
-            "), openvino "
-            "currently does not support dynamic shapes.");
-  }
-
   // If the model configuration expects batching support in the model,
   // then the openvino first dimension will be reshaped hence should not
   // be compared.
@@ -232,9 +233,8 @@ CompareDimsSupported(
     bool succ = (model_shape.size() == (size_t)full_dims.size());
     if (succ) {
       for (size_t i = 0; i < full_dims.size(); ++i) {
-        const int64_t model_dim = model_shape[i];
-        if (compare_exact || (i != 0)) {
-          succ &= (model_dim == full_dims[i]);
+        if (compare_exact || ((i != 0) && (full_dims[i] != -1))) {
+          succ &= doesMatch(model_shape[i], full_dims[i]);
         }
       }
     }
@@ -256,8 +256,7 @@ CompareDimsSupported(
     bool succ = (model_shape.size() == dims.size());
     if (succ) {
       for (size_t i = 0; i < dims.size(); ++i) {
-        const int64_t model_dim = model_shape[i];
-        succ &= (model_dim == dims[i]);
+        succ &= doesMatch(model_shape[i], dims[i]);
       }
     }
 
@@ -289,9 +288,13 @@ ReadParameter(
 }
 
 std::vector<int64_t>
-ConvertToSignedShape(const std::vector<size_t> shape)
+ConvertToSignedShape(const ov::PartialShape& shape)
 {
-  return std::vector<int64_t>{shape.begin(), shape.end()};
+  std::vector<int64_t> out;
+  for (const auto& dim : shape) {
+    out.emplace_back(dim.is_static() ? dim.get_length() : -1);
+  }
+  return out;
 }
 
 }}}  // namespace triton::backend::openvino
diff --git a/src/openvino_utils.h b/src/openvino_utils.h
@@ -92,13 +92,13 @@ std::string OpenVINOElementToModelConfigDataType(
 
 TRITONSERVER_Error* CompareDimsSupported(
     const std::string& model_name, const std::string& tensor_name,
-    const std::vector<size_t>& model_shape, const std::vector<int64_t>& dims,
+    const ov::PartialShape& model_shape, const std::vector<int64_t>& dims,
     const int max_batch_size, const bool compare_exact);
 
 TRITONSERVER_Error* ReadParameter(
     triton::common::TritonJson::Value& params, const std::string& key,
     std::string* param);
 
-std::vector<int64_t> ConvertToSignedShape(const std::vector<size_t> shape);
+std::vector<int64_t> ConvertToSignedShape(const ov::PartialShape& shape);
 
 }}}  // namespace triton::backend::openvino