Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testing for Python backend request rescheduling #6509

Merged
merged 10 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions qa/L0_backend_python/python_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def test_python_unittest(self):
model_name == "bls"
or model_name == "bls_memory"
or model_name == "bls_memory_async"
or model_name == "bls_request_rescheduling"
):
# For these tests, the memory region size will be grown. Because of
# this we need to use the shared memory probe only on the later
Expand Down
111 changes: 111 additions & 0 deletions qa/L0_backend_python/request_rescheduling/grpc_endpoint_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env python
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys

sys.path.append("../../common")

# GRPC streaming helpers..
import queue
import unittest
from functools import partial

import numpy as np
import test_util as tu
import tritonclient.grpc as grpcclient
from tritonclient.utils import InferenceServerException


class UserData:
def __init__(self):
self._completed_requests = queue.Queue()


def callback(user_data, result, error):
if error:
user_data._completed_requests.put(error)
else:
user_data._completed_requests.put(result)


class GRPCENDPOINTTest(tu.TestResultCollector):
krishung5 marked this conversation as resolved.
Show resolved Hide resolved
def test_grpc_decoupled(self, sequence_id=0, sequence_start=False):
user_data = UserData()
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
# Reload the model to reset the flag
triton_client.unload_model("generative_sequence")
triton_client.load_model("generative_sequence")

triton_client.start_stream(callback=partial(callback, user_data))
inputs = []
inputs.append(grpcclient.InferInput("IN", [1], "INT32"))
inputs[0].set_data_from_numpy(np.array([3], dtype=np.int32))

triton_client.async_stream_infer(
model_name="generative_sequence",
inputs=inputs,
sequence_id=sequence_id,
sequence_start=sequence_start,
)
res_count = 3
while res_count > 0:
data_item = user_data._completed_requests.get()
res_count -= 1
if type(data_item) == InferenceServerException:
raise data_item
else:
self.assertEqual(res_count, data_item.as_numpy("OUT")[0])
self.assertEqual(0, res_count)

def test_grpc_non_decoupled(self, sequence_id=0, sequence_start=False):
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
# Reload the model to reset the flag
triton_client.unload_model("request_rescheduling_addsub")
triton_client.load_model("request_rescheduling_addsub")

inputs = []
inputs.append(grpcclient.InferInput("INPUT0", [16], "FP32"))
inputs.append(grpcclient.InferInput("INPUT1", [16], "FP32"))
input0_val = np.random.randn(*[16]).astype(np.float32)
input1_val = np.random.randn(*[16]).astype(np.float32)
inputs[0].set_data_from_numpy(input0_val)
inputs[1].set_data_from_numpy(input1_val)

results = triton_client.infer(
model_name="request_rescheduling_addsub",
inputs=inputs,
)

output0_data = results.as_numpy("OUTPUT0")
output1_data = results.as_numpy("OUTPUT1")

self.assertTrue(np.array_equal(output0_data, input0_val + input1_val))
self.assertTrue(np.array_equal(output1_data, input0_val - input1_val))


if __name__ == "__main__":
unittest.main()
116 changes: 116 additions & 0 deletions qa/L0_backend_python/request_rescheduling/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

CLIENT_PY=../python_unittest.py
CLIENT_LOG="./request_rescheduling_client.log"
EXPECTED_NUM_TESTS="1"
TEST_RESULT_FILE='test_results.txt'
source ../../common/util.sh

TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends

RET=0

rm -fr *.log ./models *.txt

mkdir -p models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/model.py models/bls_request_rescheduling/1/
cp ../../python_models/bls_request_rescheduling/config.pbtxt models/bls_request_rescheduling

mkdir -p models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/model.py models/request_rescheduling_addsub/1/
cp ../../python_models/request_rescheduling_addsub/config.pbtxt models/request_rescheduling_addsub

mkdir -p models/generative_sequence/1/
cp ../../python_models/generative_sequence/model.py models/generative_sequence/1/
cp ../../python_models/generative_sequence/config.pbtxt models/generative_sequence

mkdir -p models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/model.py models/wrong_return_type/1/
cp ../../python_models/wrong_return_type/config.pbtxt models/wrong_return_type

SERVER_LOG="./request_rescheduling_server.log"
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --model-control-mode=explicit --load-model=* --log-verbose=1"

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

export MODEL_NAME='bls_request_rescheduling'

set +e
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** bls_request_rescheduling test FAILED. \n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

GRPC_TEST_PY=./grpc_endpoint_test.py
EXPECTED_NUM_TESTS="2"

set +e
python3 $GRPC_TEST_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** GRPC Endpoint test FAILED. \n***"
cat $CLIENT_LOG
RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
fi
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


if [ $RET -eq 1 ]; then
cat $SERVER_LOG
echo -e "\n***\n*** Request Rescheduling test FAILED. \n***"
else
echo -e "\n***\n*** Request Rescheduling test PASSED. \n***"
fi

exit $RET
2 changes: 1 addition & 1 deletion qa/L0_backend_python/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ if [ "$TEST_JETSON" == "0" ]; then
fi
fi

SUBTESTS="lifecycle restart model_control examples argument_validation logging custom_metrics"
SUBTESTS="lifecycle restart model_control examples argument_validation logging custom_metrics request_rescheduling"
for TEST in ${SUBTESTS}; do
# Run each subtest in a separate virtual environment to avoid conflicts
# between dependencies.
Expand Down
24 changes: 12 additions & 12 deletions qa/python_models/bls/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
)
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
Expand All @@ -242,7 +242,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

# The new output is the previous output + the current input
expected_output = output[0] + i
Expand Down Expand Up @@ -275,7 +275,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having the error string would make it easier to debug in case the test fails

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The correct function should be infer_response.error().message() to print out the error string. However, it will throw an error when there is no error object with infer_response if we call infer_response.error().message() here, so removing these.

expected_output = output[0] + input.as_numpy()[0]
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
Expand Down Expand Up @@ -345,7 +345,7 @@ def _get_gpu_bls_outputs(self, input0_pb, input1_pb, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
Expand Down Expand Up @@ -401,7 +401,7 @@ def test_zero_length_io(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
self.assertTrue(np.all(output0 == input0))
Expand Down Expand Up @@ -439,7 +439,7 @@ def bls_tensor_lifecycle_helper(self):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
np.testing.assert_equal(
Expand Down Expand Up @@ -497,7 +497,7 @@ def bls_tensor_lifecycle_helper(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output0_pytorch = from_dlpack(output0.to_dlpack())
Expand Down Expand Up @@ -677,7 +677,7 @@ def _test_response_iterator_square(
expected_output_cnt = np.array([expected_output_cnt], dtype=np.int32)

for infer_response in response_iterator:
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
if len(infer_response.output_tensors()) > 0:
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
Expand Down Expand Up @@ -710,7 +710,7 @@ def test_response_iterator(self):
# case 1. Use Next() to get the next response first, then use
# for-loop to get the remaining responses.
infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -734,7 +734,7 @@ def test_response_iterator(self):
# get the remaining responses.
response_count = 0
for infer_response in infer_responses:
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -744,7 +744,7 @@ def test_response_iterator(self):
break

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -759,7 +759,7 @@ def test_response_iterator(self):
infer_responses = infer_request.exec(decoupled=True)

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error(), infer_response.error())
self.assertFalse(infer_response.has_error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand Down
38 changes: 38 additions & 0 deletions qa/python_models/bls_request_rescheduling/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "bls_request_rescheduling"
backend: "python"

output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [ 16 ]
}
]

instance_group [{ kind: KIND_CPU }]
Loading
Loading