Add ensemble parameter forwarding test (#6284)

triton-inference-server · Sep 11, 2023 · 41c689e · 41c689e
1 parent e0dc4aa
commit 41c689e
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 28 deletions.
diff --git a/qa/L0_parameters/model_repository/ensemble/config.pbtxt b/qa/L0_parameters/model_repository/ensemble/config.pbtxt
@@ -0,0 +1,68 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+platform: "ensemble"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "key"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  },
+  {
+    name: "value"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling
+{
+  step [
+    {
+      model_name: "identity"
+      model_version: -1
+      input_map { key: "INPUT0", value: "INPUT0" }
+      output_map { key: "OUTPUT0", value: "OUTPUT0" }
+    },
+    {
+      model_name: "parameter"
+      model_version: -1
+      input_map { key: "INPUT0", value: "OUTPUT0" }
+      output_map { key: "key", value: "key" }
+      output_map { key: "value", value: "value" }
+    }
+  ]
+}
diff --git a/qa/L0_parameters/model_repository/identity/config.pbtxt b/qa/L0_parameters/model_repository/identity/config.pbtxt
@@ -0,0 +1,44 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "identity"
+max_batch_size: 0
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ 1 ]
+  }
+]
diff --git a/qa/L0_parameters/parameters_test.py b/qa/L0_parameters/parameters_test.py
@@ -94,9 +94,20 @@ def create_inputs(self, client_type):
         inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.float32))
         return inputs
 
-    async def send_request_and_verify(self, client_type, client, is_async=False):
+    async def send_request_and_verify(
+        self, client_type, client, is_async=False, model_name="parameter"
+    ):
         inputs = self.create_inputs(client_type)
         for parameters in self.parameter_list:
+            # Setup infer callable to re-use below for brevity
+            infer_callable = partial(
+                client.infer,
+                model_name=model_name,
+                inputs=inputs,
+                parameters=parameters,
+                headers=self.headers,
+            )
+
             # The `triton_` prefix is reserved for Triton usage
             should_error = False
             if "triton_" in parameters.keys():
@@ -105,38 +116,17 @@ async def send_request_and_verify(self, client_type, client, is_async=False):
             if is_async:
                 if should_error:
                     with self.assertRaises(InferenceServerException):
-                        result = await client.infer(
-                            model_name="parameter",
-                            inputs=inputs,
-                            parameters=parameters,
-                            headers=self.headers,
-                        )
+                        await infer_callable()
                     return
                 else:
-                    result = await client.infer(
-                        model_name="parameter",
-                        inputs=inputs,
-                        parameters=parameters,
-                        headers=self.headers,
-                    )
-
+                    result = await infer_callable()
             else:
                 if should_error:
                     with self.assertRaises(InferenceServerException):
-                        result = client.infer(
-                            model_name="parameter",
-                            inputs=inputs,
-                            parameters=parameters,
-                            headers=self.headers,
-                        )
+                        infer_callable()
                     return
                 else:
-                    result = client.infer(
-                        model_name="parameter",
-                        inputs=inputs,
-                        parameters=parameters,
-                        headers=self.headers,
-                    )
+                    result = infer_callable()
 
             self.verify_outputs(result, parameters)
 
@@ -219,6 +209,9 @@ def test_grpc_stream_parameter(self):
             self.verify_outputs(result, parameters)
         self.grpc.stop_stream()
 
+    async def test_ensemble_parameter_forwarding(self):
+        await self.send_request_and_verify(httpclient, self.http, model_name="ensemble")
+
     async def asyncTearDown(self):
         self.http.close()
         self.grpc.close()

diff --git a/qa/L0_parameters/test.sh b/qa/L0_parameters/test.sh
@@ -45,6 +45,11 @@ SERVER=/opt/tritonserver/bin/tritonserver
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
+MODELDIR="model_repository"
+# Use identity model as dummy step to ensure parameters pass through each step
+mkdir -p "${MODELDIR}/identity/1"
+mkdir -p "${MODELDIR}/ensemble/1"
+
 # TODO: Add support and testing for C++ client parameters:
 # https://jirasw.nvidia.com/browse/DLIS-4673
 
@@ -54,9 +59,9 @@ for i in {0..1}; do
   # TEST_HEADER is a parameter used by `parameters_test.py` that controls
   # whether the script will test for inclusion of headers in parameters or not.
   if [ $i == 1 ]; then
-    SERVER_ARGS="--model-repository=model_repository --exit-timeout-secs=120 --grpc-header-forward-pattern my_header.* --http-header-forward-pattern my_header.*"
+    SERVER_ARGS="--model-repository=${MODELDIR} --exit-timeout-secs=120 --grpc-header-forward-pattern my_header.* --http-header-forward-pattern my_header.*"
   else
-    SERVER_ARGS="--model-repository=model_repository --exit-timeout-secs=120"
+    SERVER_ARGS="--model-repository=${MODELDIR} --exit-timeout-secs=120"
   fi
   run_server
   if [ "$SERVER_PID" == "0" ]; then