Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the support tracing of child models invoked from a BLS model #6063

Merged
merged 15 commits into from
Aug 7, 2023
Merged
29 changes: 27 additions & 2 deletions qa/L0_trace/opentelemetry_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import sys

sys.path.append("../common")
import json
import re
import unittest

Expand All @@ -40,29 +41,54 @@

class OpenTelemetryTest(tu.TestResultCollector):
def setUp(self):
# Extracted spans are in json-like format, thus data needs to be
# post-processed, so that `json` could accept it for further
# processing
with open("trace_collector.log", "rt") as f:
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
data = f.read()
# Removing new lines and tabs around `{`
json_string = re.sub("\n\t{\n\t", "{", data)
# `resources` field is a dictionary, so adding `{` and`}`
# in the next 2 transformations, `instr-lib` is a next field,
# so whatever goes before it, belongs to `resources`.
json_string = re.sub(
"resources : \n\t", "resources : {\n\t", json_string
)
json_string = re.sub(
"\n instr-lib :", "}\n instr-lib :", json_string
)
# `json`` expects "key":"value" format, some fields in the
# data have empty string as value, so need to add `"",`
json_string = re.sub(": \n\t", ':"",', json_string)
json_string = re.sub(": \n", ':"",', json_string)
# Extracted data missing `,' after each key-value pair,
# which `json` exppects
json_string = re.sub("\n|\n\t", ",", json_string)
# Removing tabs
json_string = re.sub("\t", "", json_string)
# `json` expects each key and value have `"`'s, so adding them to
# every word/number/alpha-numeric entry
json_string = re.sub(r"\b([\w.-]+)\b", r'"\1"', json_string)
# `span kind`` represents one key
json_string = re.sub('"span" "kind"', '"span kind"', json_string)
# Removing extra `,`
json_string = re.sub("{,", "{", json_string)
json_string = re.sub(",}", "}", json_string)
# Adding `,` between dictionary entries
json_string = re.sub("}{", "},{", json_string)
# `events` is a list of dictionaries, `json` will accept it in the
# form of "events" : [{....}, {.....}, ...]
json_string = re.sub(
'"events" : {', '"events" : [{', json_string
)
# Closing `events`' list of dictionaries
json_string = re.sub('}, "links"', '}], "links"', json_string)
self.spans = eval(json_string[:-1])
# Last 2 symbols are not needed
json_string = json_string[:-2]
# Since now `json_string` is a string, which represents dictionaries,
# we put it into one dictionary, so that `json` could read it as one.
json_string = '{ "spans" :[' + json_string + "] }"
self.spans = json.loads(json_string)["spans"]

self.simple_model_name = "simple"
self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
Expand Down Expand Up @@ -187,7 +213,6 @@ def test_nested_spans(self):
# request (bls)
children = self.spans[10:]
parents = (self.spans[11:13], self.spans[14], self.spans[14:])
print(parents)
for child, parent in zip(children, parents[0]):
self._check_parent(child, parent)

Expand Down
53 changes: 33 additions & 20 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,6 @@ CLIENT_LOG="client.log"
TEST_RESULT_FILE="test_results.txt"
EXPECTED_NUM_TESTS="6"

TRACE_COLLECTOR=trace_collector.py
TRACE_COLLECTOR_LOG="trace_collector.log"
OTLP_PORT=10000

REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
if [ "$#" -ge 1 ]; then
REPO_VERSION=$1
Expand Down Expand Up @@ -667,6 +663,8 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

set +e

$TRACE_SUMMARY -t bls_trace.log > summary_bls.log

if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then
Expand All @@ -690,17 +688,28 @@ fi
# Check opentelemetry trace exporter sends proper info.
# A helper python script starts listening on $OTLP_PORT, where
# OTLP exporter sends traces.
TRITON_OPENTELEMETRY_TEST='false'
export TRITON_OPENTELEMETRY_TEST='false'
OTLP_PORT=10000
OTEL_COLLECTOR_DIR=./opentelemetry-collector
OTEL_COLLECTOR=./opentelemetry-collector/bin/otelcorecol_*
OTEL_COLLECTOR_LOG="./trace_collector_http_exporter.log"

# Building the latest version of the OpenTelemetry collector.
# Ref: https://opentelemetry.io/docs/collector/getting-started/#local
if [ -d "$OTEL_COLLECTOR_DIR" ]; then rm -Rf $OTEL_COLLECTOR_DIR; fi
git clone https://github.com/open-telemetry/opentelemetry-collector.git
cd $OTEL_COLLECTOR_DIR
make install-tools
make otelcorecol
cd ..
$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

# Using netcat as trace collector
apt-get update && apt-get install -y netcat
timeout 2m nc -l -k 127.0.0.1 $OTLP_PORT >> trace_collector_http_exporter.log 2>&1 & COLLECTOR_PID=$!

SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
--trace-config=count=100 --trace-config=mode=opentelemetry \
--trace-config=opentelemetry,url=localhost:$OTLP_PORT \
--trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \
--model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config.log"
SERVER_LOG="./inference_server_otel_http_exporter.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -709,21 +718,21 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

$SIMPLE_HTTP_CLIENT >> client_update.log 2>&1

set +e

wait $COLLECTOR_PID
$SIMPLE_HTTP_CLIENT >>$CLIENT_LOG 2>&1

set -e

kill $SERVER_PID
wait $SERVER_PID

kill $COLLECTOR_PID
wait $COLLECTOR_PID

set +e

if ! [ -s trace_collector_http_exporter.log ] && [ `grep -c 'Host: localhost:10000' trace_collector_http_exporter.log` != 3 ] ; then
if ! [[ -s $OTEL_COLLECTOR_LOG && `grep -c 'InstrumentationScope triton-server' $OTEL_COLLECTOR_LOG` == 3 ]] ; then
echo -e "\n***\n*** HTTP exporter test failed.\n***"
cat $OTEL_COLLECTOR_LOG
exit 1
fi

Expand All @@ -733,15 +742,14 @@ OPENTELEMETRY_TEST=opentelemetry_unittest.py
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
EXPECTED_NUM_TESTS="3"

TRITON_OPENTELEMETRY_TEST='true'
export TRITON_OPENTELEMETRY_TEST='true'

SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
--trace-config=count=100 --trace-config=mode=opentelemetry \
--trace-config=opentelemetry,url=localhost:$OTLP_PORT \
--trace-config=opentelemetry,resource=test.key=test.value \
--trace-config=opentelemetry,resource=service.name=test_triton \
--model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config.log"
SERVER_LOG="./inference_server_otel_ostream_exporter.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -767,7 +775,12 @@ wait $SERVER_PID

set +e

grep -z -o -P '({\n(?s).*}\n)' inference_server_trace_config.log >> trace_collector.log
grep -z -o -P '({\n(?s).*}\n)' $SERVER_LOG >> trace_collector.log

if ! [ -s trace_collector.log ] ; then
echo -e "\n***\n*** $SERVER_LOG did not contain any OpenTelemetry spans.\n***"
exit 1
fi

# Unittest will not start until expected number of spans is collected.
python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
Expand Down
45 changes: 45 additions & 0 deletions qa/L0_trace/trace-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# Simple config file for OpenTelemetry collector.
# It receives all traces, received on localhost:10000 and prints
# it into the output stream.
# Ref: https://opentelemetry.io/docs/collector/configuration/
receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:10000

exporters:
logging:
verbosity: detailed

service:
pipelines:
traces:
receivers: [otlp]
exporters: [logging]
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved