triton-inference-server · oandreeva-nv · Aug 7, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023
diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md
@@ -427,11 +427,41 @@ The meaning of the trace timestamps is:
 
   * BACKEND_OUTPUT: The tensor in the response of a backend.
 
+## Tracing for BLS models
+
+Triton does not collect traces for child models, invoked from
+[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting) models.
+
+To include child models into collected traces, user needs to provide `trace`
+argument (as shown in the example), when constructing an InferenceRequest object.
+
+```python
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+  ...
+    def execute(self, requests):
+      ...
+      for request in requests:
+        ...
+        # Create an InferenceRequest object. `model_name`,
+        # `requested_output_names`, and `inputs` are the required arguments and
+        # must be provided when constructing an InferenceRequest object. Make
+        # sure to replace `inputs` argument with a list of `pb_utils.Tensor`
+        # objects.
+        inference_request = pb_utils.InferenceRequest(
+            model_name='model_name',
+            requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
+            inputs=[<pb_utils.Tensor object>], trace = request.trace())
+
+```
+
 ## OpenTelemetry trace support
 
-Triton provides an option to generate and export traces
-for standalone and ensemble models
-using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
+Triton provides an option to generate and export traces using
+[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
 
 To specify OpenTelemetry mode for tracing, specify the `--trace-config`
 flag as follows:
@@ -477,16 +507,30 @@ The following table shows available OpenTelemetry trace APIs settings for
       trace data.
     </td>
     </tr>
+    <tr>
+    <td><code>resource</code></td>
+    <td><code>Empty</code></td>
+    <td>
+      Key-value pairs to be used as resource attributes. <br/>
+      Should be specified following the provided template:<br/>
+      <code>--trace-config opentelemetry,resource=<<text>key</text>>=<<text>value</text>></code><br/>
+      For example:<br/>
+      <code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
+      <code>--trace-config opentelemetry,resource=service.version=1</code><br/>
+      Alternatively, key-vaue attributes can be specified through <br/>
+      <a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
+      OTEL_RESOURCE_ATTRIBUTES</a>
+      environment variable.
+    </td>
+    </tr>
   </tbody>
 </table>
 
+
 ### Limitations
 
 - OpenTelemetry trace mode is not supported on Windows systems.
 
-- Tracing [BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
-models is not supported.
-
 - Triton supports only
 [OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp)
 and allows specification of only url for this exporter through

diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py
@@ -36,7 +36,7 @@
 import tritonclient.grpc as grpcclient
 import tritonclient.http as httpclient
 
-EXPECTED_NUM_SPANS = 10
+EXPECTED_NUM_SPANS = 16
 
 
 class OpenTelemetryTest(tu.TestResultCollector):
@@ -55,12 +55,17 @@ def setUp(self):
             entry.split("POST")[0] for entry in data if "resource_spans" in entry
         ]
         self.spans = []
+        self.resource_attributes = []
         for span in full_spans:
             span = json.loads(span)
             self.spans.append(span["resource_spans"][0]["scope_spans"][0]["spans"][0])
+            self.resource_attributes.append(
+                span["resource_spans"][0]["resource"]["attributes"]
+            )
 
         self.simple_model_name = "simple"
         self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
+        self.bls_model_name = "bls_simple"
         self.root_span = "InferRequest"
 
     def _check_events(self, span_name, events):
@@ -137,16 +142,18 @@ def test_spans(self):
             self._check_events(span_name, json.dumps(span["events"]))
             parsed_spans.append(span_name)
 
-        # There should be 6 spans in total:
-        # 3 for http request, 3 for grpc request, 4 for ensemble
-        self.assertEqual(len(self.spans), 10)
-        # We should have 3 compute spans
-        self.assertEqual(parsed_spans.count("compute"), 3)
-        # 4 request spans (3 named simple - same as our model name, 1 ensemble)
-        self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
-        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
-        # 3 root spans
-        self.assertEqual(parsed_spans.count(self.root_span), 3)
+        # There should be 16 spans in total:
+        # 3 for http request, 3 for grpc request, 4 for ensemble, 6 for bls
+        self.assertEqual(len(self.spans), EXPECTED_NUM_SPANS)
+        # We should have 5 compute spans
+        self.assertEqual(parsed_spans.count("compute"), 5)
+        # 7 request spans
+        # (4 named simple - same as our model name, 2 ensemble, 1 bls)
+        self.assertEqual(parsed_spans.count(self.simple_model_name), 4)
+        self.assertEqual(parsed_spans.count(self.ensemble_model_name), 2)
+        self.assertEqual(parsed_spans.count(self.bls_model_name), 1)
+        # 4 root spans
+        self.assertEqual(parsed_spans.count(self.root_span), 4)
 
     def test_nested_spans(self):
         # First 3 spans in `self.spans` belong to HTTP request
@@ -182,6 +189,18 @@ def test_nested_spans(self):
             "parent_span_id", self.spans[9], "root span has a parent_span_id specified"
         )
 
+    def test_resource_attributes(self):
+        test_attribute_entry = "{{'key': {k}, 'value': {{'string_value': {v}}}}}"
+        for attribute in self.resource_attributes:
+            self.assertIn(
+                test_attribute_entry.format(k="'test.key'", v="'test.value'"),
+                str(attribute),
+            )
+            self.assertIn(
+                test_attribute_entry.format(k="'service.name'", v="'test_triton'"),
+                str(attribute),
+            )
+
 
 def prepare_data(client):
     inputs = []
@@ -214,6 +233,16 @@ def prepare_traces():
     inputs = prepare_data(httpclient)
     triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)
 
+    send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")
+
+
+def send_bls_request(model_name="simple"):
+    with httpclient.InferenceServerClient("localhost:8000") as client:
+        inputs = prepare_data(httpclient)
+        inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
+        inputs[2].set_data_from_numpy(np.array([model_name], dtype=np.object_))
+        client.infer("bls_simple", inputs)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh
@@ -55,6 +55,7 @@ export CUDA_VISIBLE_DEVICES=0
 
 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository
 ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/
+BLSDIR=../python_models/bls_simple
 MODELBASE=onnx_int32_int32_int32
 
 MODELSDIR=`pwd`/trace_models
@@ -78,7 +79,8 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
     rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
     (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
             sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
-            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt)
+            sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) && \
+    mkdir -p $MODELSDIR/bls_simple/1 && cp $BLSDIR/bls_simple.py $MODELSDIR/bls_simple/1/model.py
 
 RET=0
 
@@ -618,7 +620,7 @@ wait $SERVER_PID
 
 
 # Check `--trace-config` sets arguments properly
-SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \
+SERVER_ARGS="--trace-config=triton,file=bls_trace.log --trace-config=level=TIMESTAMPS \
             --trace-config=rate=4 --trace-config=count=6 --trace-config=mode=triton --model-repository=$MODELSDIR"
 SERVER_LOG="./inference_server_trace_config.log"
 run_server
@@ -649,16 +651,41 @@ fi
 if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then
     RET=1
 fi
-if [ `grep -c "\"trace_file\":\"some_file.log\"" ./curl.out` != "1" ]; then
+if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then
     RET=1
 fi
 
+set +e
+# Send bls requests to make sure simple model is traced
+for p in {1..4}; do
+    python -c 'import opentelemetry_unittest; \
+        opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")'  >> client_update.log 2>&1
+done
+
 set -e
 
 kill $SERVER_PID
 wait $SERVER_PID
 
-set +e
+$TRACE_SUMMARY -t bls_trace.log > summary_bls.log
+
+if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c ^simple summary_bls.log` != "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+if [ `grep -c 'parent id' bls_trace.log` == "1" ]; then
+    cat summary_bls.log
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
 
 # Check opentelemetry trace exporter sends proper info.
 # A helper python script starts listening on $OTLP_PORT, where
@@ -668,18 +695,18 @@ set +e
 
 OPENTELEMETRY_TEST=opentelemetry_unittest.py
 OPENTELEMETRY_LOG="opentelemetry_unittest.log"
-EXPECTED_NUM_TESTS="2"
+EXPECTED_NUM_TESTS="3"
 
 SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
                 --trace-config=count=100 --trace-config=mode=opentelemetry \
                 --trace-config=opentelemetry,url=localhost:$OTLP_PORT \
+                --trace-config=opentelemetry,resource=test.key=test.value \
+                --trace-config=opentelemetry,resource=service.name=test_triton \
                 --model-repository=$MODELSDIR"
 SERVER_LOG="./inference_server_trace_config.log"
 
-# Increasing OTLP timeout, since we don't use a valid OTLP collector
-# and don't send a proper signal back.
-export OTEL_EXPORTER_OTLP_TIMEOUT=50000
-export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=50000
+export OTEL_EXPORTER_OTLP_TIMEOUT=5
+export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=5
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then

diff --git a/qa/python_models/bls_simple/bls_simple.py b/qa/python_models/bls_simple/bls_simple.py
@@ -0,0 +1,84 @@
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        inputs = [
+            {"name": "MODEL_NAME", "data_type": "TYPE_STRING", "dims": [1]},
+            {"name": "INPUT0", "data_type": "TYPE_INT32", "dims": [1, 16]},
+            {"name": "INPUT1", "data_type": "TYPE_INT32", "dims": [1, 16]},
+        ]
+        outputs = [
+            {"name": "OUTPUT0", "data_type": "TYPE_INT32", "dims": [16]},
+            {"name": "OUTPUT1", "data_type": "TYPE_INT32", "dims": [16]},
+        ]
+
+        config = auto_complete_model_config.as_dict()
+        input_names = []
+        output_names = []
+        for input in config["input"]:
+            input_names.append(input["name"])
+        for output in config["output"]:
+            output_names.append(output["name"])
+
+        for input in inputs:
+            if input["name"] not in input_names:
+                auto_complete_model_config.add_input(input)
+        for output in outputs:
+            if output["name"] not in output_names:
+                auto_complete_model_config.add_output(output)
+
+        auto_complete_model_config.set_max_batch_size(0)
+
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
+            model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME")
+            model_name_string = model_name.as_numpy()[0]
+
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name_string,
+                requested_output_names=["OUTPUT0", "OUTPUT1"],
+                inputs=[in_0, in_1],
+                trace=request.trace(),
+            )
+
+            infer_response = infer_request.exec()
+
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=infer_response.output_tensors()
+            )
+            responses.append(inference_response)
+
+        return responses