Skip to content

Commit

Permalink
Revised PR, added ensemble test
Browse files Browse the repository at this point in the history
  • Loading branch information
oandreeva-nv committed Jul 6, 2023
1 parent 1cbc24a commit 71eeb29
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 210 deletions.
1 change: 1 addition & 0 deletions docs/user_guide/trace.md
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ The meaning of the trace timestamps is:
## OpenTelemetry trace support

Triton provides an option to generate and export traces
for standalone and ensemble models
using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).

To specify OpenTelemetry mode for tracing, specify the `--trace-config`
Expand Down
89 changes: 71 additions & 18 deletions qa/L0_trace/opentelemetry_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,38 @@
import sys

sys.path.append("../common")

import json
import unittest

import tritonclient.http as httpclient
import tritonclient.grpc as grpcclient
import numpy as np
import test_util as tu
import time

EXPECTED_NUM_SPANS = 10

class OpenTelemetryTest(tu.TestResultCollector):

def setUp(self):
with open('trace_collector.log', 'rt') as f:
data = f.read()

while True:
with open('trace_collector.log', 'rt') as f:
data = f.read()
if data.count("resource_spans") != EXPECTED_NUM_SPANS:
time.sleep(5)
continue
else:
break

data = data.split('\n')
full_spans = [entry for entry in data if "resource_spans" in entry]
full_spans = [entry.split('POST')[0] for entry in data if "resource_spans" in entry]
self.spans = []
for span in full_spans:
span = json.loads(span)
self.spans.append(
span["resource_spans"][0]['scope_spans'][0]['spans'][0])

self.model_name = "simple"
self.simple_model_name = "simple"
self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
self.root_span = "InferRequest"

def _check_events(self, span_name, events):
Expand Down Expand Up @@ -102,7 +112,7 @@ def _check_events(self, span_name, events):
self.assertFalse(
all(entry in events for entry in compute_events))

elif span_name == self.model_name:
elif span_name == self.simple_model_name:
# Check that all request related events (and only them)
# are recorded in request span
self.assertTrue(all(entry in events for entry in request_events))
Expand Down Expand Up @@ -131,14 +141,15 @@ def test_spans(self):
parsed_spans.append(span_name)

# There should be 6 spans in total:
# 3 for http request and 3 for grpc request.
self.assertEqual(len(self.spans), 6)
# We should have 2 compute spans
self.assertEqual(parsed_spans.count("compute"), 2)
# 2 request spans (named simple - same as our model name)
self.assertEqual(parsed_spans.count(self.model_name), 2)
# 2 root spans
self.assertEqual(parsed_spans.count(self.root_span), 2)
# 3 for http request, 3 for grpc request, 4 for ensemble
self.assertEqual(len(self.spans), 10)
# We should have 3 compute spans
self.assertEqual(parsed_spans.count("compute"), 3)
# 4 request spans (3 named simple - same as our model name, 1 ensemble)
self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
# 3 root spans
self.assertEqual(parsed_spans.count(self.root_span), 3)

def test_nested_spans(self):

Expand All @@ -156,9 +167,9 @@ def test_nested_spans(self):
self.spans[2],
"root span has a parent_span_id specified")

# Last 3 spans in `self.spans` belong to GRPC request
# Next 3 spans in `self.spans` belong to GRPC request
# Order of spans and their relationship described earlier
for child, parent in zip(self.spans[3:], self.spans[4:]):
for child, parent in zip(self.spans[3:6], self.spans[4:6]):
self._check_parent(child, parent)

# root_span should not have `parent_span_id` field
Expand All @@ -167,6 +178,48 @@ def test_nested_spans(self):
self.spans[5],
"root span has a parent_span_id specified")

# Final 4 spans in `self.spans` belong to ensemble request
# Order of spans: compute span - request span - request span - root span
for child, parent in zip(self.spans[6:10], self.spans[7:10]):
self._check_parent(child, parent)

# root_span should not have `parent_span_id` field
self.assertNotIn(
'parent_span_id',
self.spans[9],
"root span has a parent_span_id specified")

def prepare_data(client):

inputs = []
outputs = []
input0_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)
input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)

inputs.append(client.InferInput('INPUT0', [1, 16], "INT32"))
inputs.append(client.InferInput('INPUT1', [1, 16], "INT32"))

# Initialize the data
inputs[0].set_data_from_numpy(input0_data)
inputs[1].set_data_from_numpy(input1_data)

return inputs

def prepare_traces():

triton_client_http = httpclient.InferenceServerClient("localhost:8000",
verbose=True)
triton_client_grpc = grpcclient.InferenceServerClient("localhost:8001",
verbose=True)
inputs = prepare_data(httpclient)
triton_client_http.infer("simple",inputs)

inputs = prepare_data(grpcclient)
triton_client_grpc.infer("simple", inputs)

inputs = prepare_data(httpclient)
triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)


if __name__ == '__main__':
unittest.main()
58 changes: 23 additions & 35 deletions qa/L0_trace/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,19 @@ rm -f *.log
rm -fr $MODELSDIR && mkdir -p $MODELSDIR

# set up simple and global_simple model using MODELBASE
rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
(cd $MODELSDIR/simple && \
sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt) && \
cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \
(cd $MODELSDIR/global_simple && \
sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \
cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \
rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
(cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt)

RET=0

Expand Down Expand Up @@ -661,11 +666,12 @@ wait $SERVER_PID

OPENTELEMETRY_TEST=opentelemetry_unittest.py
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
TEST_RESULT_FILE="opentelemetry_results.txt"
EXPECTED_NUM_TESTS="2"

SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \
--trace-config=rate=1 --trace-config=count=6 --trace-config=mode=opentelemetry --trace-config=opentelemetry,url=localhost:$OTLP_PORT --model-repository=$MODELSDIR"
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
--trace-config=count=100 --trace-config=mode=opentelemetry \
--trace-config=opentelemetry,url=localhost:$OTLP_PORT \
--model-repository=$MODELSDIR"
SERVER_LOG="./inference_server_trace_config.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -674,37 +680,18 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

# This is a simple python code that opens port
python $TRACE_COLLECTOR $OTLP_PORT $TRACE_COLLECTOR_LOG &
COLLECTOR_PID=$!

set +e

# To make sure receiver is ready
sleep 3

# Send http request and collect trace
$SIMPLE_HTTP_CLIENT >> client_update.log 2>&1
if [ $? -ne 0 ]; then
cat client_update.log
RET=1
fi

# Send grpc request and collect trace
$SIMPLE_GRPC_CLIENT >> client_update.log 2>&1
if [ $? -ne 0 ]; then
cat client_update.log
RET=1
fi
# To make sure log gets all data
sleep 3

kill $COLLECTOR_PID
wait $COLLECTOR_PID
# Using netcat as trace collector
apt-get update && apt-get install -y netcat
nc -l -k 127.0.0.1 $OTLP_PORT >> $TRACE_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!

# Preparing traces for unittest.
# Note: need to run this separately, to speed up trace collection.
# Otherwise internal (opentelemetry_unittest.OpenTelemetryTest.setUp) check
# will slow down collection.
python -c 'import opentelemetry_unittest; opentelemetry_unittest.prepare_traces()' >>$CLIENT_LOG 2>&1

set +e

# Unittest will not start untill expected number of spans is collected.
python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
if [ $? -ne 0 ]; then
cat $OPENTELEMETRY_LOG
Expand All @@ -718,11 +705,12 @@ else
fi
fi

kill $COLLECTOR_PID
wait $COLLECTOR_PID

set -e

kill $SERVER_PID
wait $SERVER_PID

set +e

exit $RET
66 changes: 0 additions & 66 deletions qa/L0_trace/trace_collector.py

This file was deleted.

Loading

0 comments on commit 71eeb29

Please sign in to comment.