Skip to content

Commit

Permalink
Add testing for Python backend request rescheduling (#6509)
Browse files Browse the repository at this point in the history
* Add testing

* Fix up

* Enhance testing

* Fix up

* Revert test changes

* Add grpc endpoint test

* Remove unused import

* Remove unused import

* Update qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py

Co-authored-by: Iman Tabrizian <[email protected]>

* Update qa/python_models/bls_request_rescheduling/model.py

Co-authored-by: Iman Tabrizian <[email protected]>

---------

Co-authored-by: Iman Tabrizian <[email protected]>
  • Loading branch information
krishung5 and Tabrizian committed Nov 9, 2023
1 parent d42ff6e commit e28eff9
Show file tree
Hide file tree
Showing 13 changed files with 853 additions and 13 deletions.
1 change: 1 addition & 0 deletions qa/L0_backend_python/python_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_python_unittest(self):
model_name == "bls"
or model_name == "bls_memory"
or model_name == "bls_memory_async"
or model_name == "bls_request_rescheduling"
):
# For these tests, the memory region size will be grown. Because of
# this we need to use the shared memory probe only on the later
Expand Down
111 changes: 111 additions & 0 deletions qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../../common")

# GRPC streaming helpers..
import queue
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class UserData:
def __init__(self):
self._completed_requests = queue.Queue()


def callback(user_data, result, error):
if error:
user_data._completed_requests.put(error)
else:
user_data._completed_requests.put(result)


class GrpcEndpointTest(tu.TestResultCollector):
def test_grpc_decoupled(self, sequence_id=0, sequence_start=False):
user_data = UserData()
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
# Reload the model to reset the flag
triton_client.unload_model("generative_sequence")
triton_client.load_model("generative_sequence")

triton_client.start_stream(callback=partial(callback, user_data))
inputs = []
inputs.append(grpcclient.InferInput("IN", [1], "INT32"))
inputs[0].set_data_from_numpy(np.array([3], dtype=np.int32))

triton_client.async_stream_infer(
model_name="generative_sequence",
inputs=inputs,
sequence_id=sequence_id,
sequence_start=sequence_start,
)
res_count = 3
while res_count > 0:
data_item = user_data._completed_requests.get()
res_count -= 1
if type(data_item) == InferenceServerException:
raise data_item
else:
self.assertEqual(res_count, data_item.as_numpy("OUT")[0])
self.assertEqual(0, res_count)

def test_grpc_non_decoupled(self, sequence_id=0, sequence_start=False):
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
# Reload the model to reset the flag
triton_client.unload_model("request_rescheduling_addsub")
triton_client.load_model("request_rescheduling_addsub")

inputs = []
inputs.append(grpcclient.InferInput("INPUT0", [16], "FP32"))
inputs.append(grpcclient.InferInput("INPUT1", [16], "FP32"))
input0_val = np.random.randn(*[16]).astype(np.float32)
input1_val = np.random.randn(*[16]).astype(np.float32)
inputs[0].set_data_from_numpy(input0_val)
inputs[1].set_data_from_numpy(input1_val)

results = triton_client.infer(
model_name="request_rescheduling_addsub",
inputs=inputs,
)

output0_data = results.as_numpy("OUTPUT0")
output1_data = results.as_numpy("OUTPUT1")

self.assertTrue(np.array_equal(output0_data, input0_val + input1_val))
self.assertTrue(np.array_equal(output1_data, input0_val - input1_val))


if __name__ == "__main__":
unittest.main()
116 changes: 116 additions & 0 deletions qa/L0_backend_python/request_rescheduling/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../python_unittest.py
CLIENT_LOG="./request_rescheduling_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh

TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends

RET=0

rm -fr *.log ./models *.txt

mkdir -p models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/model.py models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/config.pbtxt models/bls_request_rescheduling

mkdir -p models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/model.py models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/config.pbtxt models/request_rescheduling_addsub

mkdir -p models/generative_sequence/1/
cp ../../python_models/generative_sequence/model.py models/generative_sequence/1/
cp ../../python_models/generative_sequence/config.pbtxt models/generative_sequence

mkdir -p models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/model.py models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/config.pbtxt models/wrong_return_type

SERVER_LOG="./request_rescheduling_server.log"
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --load-model=* --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

export MODEL_NAME='bls_request_rescheduling'

set +e
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** bls_request_rescheduling test FAILED. \n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

GRPC_TEST_PY=./grpc_endpoint_test.py
EXPECTED_NUM_TESTS="2"

set +e
python3 $GRPC_TEST_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** GRPC Endpoint test FAILED. \n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


if [ $RET -eq 1 ]; then
cat $SERVER_LOG
echo -e "\n***\n*** Request Rescheduling test FAILED. \n***"
else
echo -e "\n***\n*** Request Rescheduling test PASSED. \n***"
fi

exit $RET
2 changes: 1 addition & 1 deletion qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ if [ "$TEST_JETSON" == "0" ]; then
fi
fi

SUBTESTS="lifecycle restart model_control examples argument_validation logging custom_metrics"
SUBTESTS="lifecycle restart model_control examples argument_validation logging custom_metrics request_rescheduling"
for TEST in ${SUBTESTS}; do
# Run each subtest in a separate virtual environment to avoid conflicts
# between dependencies.
Expand Down
24 changes: 12 additions & 12 deletions qa/python_models/bls/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
)
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
Expand All @@ -242,7 +242,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

# The new output is the previous output + the current input
expected_output = output[0] + i
Expand Down Expand Up @@ -275,7 +275,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
expected_output = output[0] + input.as_numpy()[0]
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
Expand Down Expand Up @@ -345,7 +345,7 @@ def _get_gpu_bls_outputs(self, input0_pb, input1_pb, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
Expand Down Expand Up @@ -401,7 +401,7 @@ def test_zero_length_io(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
self.assertTrue(np.all(output0 == input0))
Expand Down Expand Up @@ -439,7 +439,7 @@ def bls_tensor_lifecycle_helper(self):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
np.testing.assert_equal(
Expand Down Expand Up @@ -497,7 +497,7 @@ def bls_tensor_lifecycle_helper(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output0_pytorch = from_dlpack(output0.to_dlpack())
Expand Down Expand Up @@ -677,7 +677,7 @@ def _test_response_iterator_square(
expected_output_cnt = np.array([expected_output_cnt], dtype=np.int32)

for infer_response in response_iterator:
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
if len(infer_response.output_tensors()) > 0:
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
Expand Down Expand Up @@ -710,7 +710,7 @@ def test_response_iterator(self):
# case 1. Use Next() to get the next response first, then use
# for-loop to get the remaining responses.
infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -734,7 +734,7 @@ def test_response_iterator(self):
# get the remaining responses.
response_count = 0
for infer_response in infer_responses:
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -744,7 +744,7 @@ def test_response_iterator(self):
break

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -759,7 +759,7 @@ def test_response_iterator(self):
infer_responses = infer_request.exec(decoupled=True)

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand Down
38 changes: 38 additions & 0 deletions qa/python_models/bls_request_rescheduling/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_request_rescheduling"
backend: "python"

output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]

instance_group [{ kind: KIND_CPU }]
Loading

0 comments on commit e28eff9

Please sign in to comment.