diff --git a/qa/L0_backend_python/lifecycle/lifecycle_test.py b/qa/L0_backend_python/lifecycle/lifecycle_test.py index d6eb2a8f53..3874ef428e 100755 --- a/qa/L0_backend_python/lifecycle/lifecycle_test.py +++ b/qa/L0_backend_python/lifecycle/lifecycle_test.py @@ -68,7 +68,7 @@ def setUp(self): self._shm_leak_detector = shm_util.ShmLeakDetector() def _get_metrics(self): - metrics_url = "http://localhost:8002/metrics" + metrics_url = f"http://{_tritonserver_ipaddr}:8002/metrics" r = requests.get(metrics_url) r.raise_for_status() return r.text @@ -305,7 +305,9 @@ def test_triton_grpc_error_cancel(self): shape = [2, 2] number_of_requests = 1 user_data = UserData() - triton_server_url = "localhost:8001" # Replace with your Triton server address + triton_server_url = ( + f"{_tritonserver_ipaddr}:8001" # Replace with your Triton server address + ) stream_end = False triton_client = grpcclient.InferenceServerClient(triton_server_url) diff --git a/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py b/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py index 386a54e3d3..91860281c4 100644 --- a/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py @@ -31,6 +31,10 @@ import numpy as np import tritonclient.grpc as grpcclient +# By default, find tritonserver on "localhost", but for windows tests +# we overwrite the IP address with the TRITONSERVER_IPADDR envvar +_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") + class ResponseSenderTest(unittest.TestCase): def _generate_streaming_callback_and_responses_pair(self): @@ -53,7 +57,7 @@ def test_respond_after_complete_final(self): inputs[0].set_data_from_numpy(input0_np) callback, responses = self._generate_streaming_callback_and_responses_pair() - with grpcclient.InferenceServerClient("localhost:8001") as client: + with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client: client.start_stream(callback) client.async_stream_infer(model_name, inputs) client.stop_stream() diff --git a/qa/L0_backend_python/response_sender/response_sender_test.py b/qa/L0_backend_python/response_sender/response_sender_test.py index 81f8c75f2c..855efde92d 100644 --- a/qa/L0_backend_python/response_sender/response_sender_test.py +++ b/qa/L0_backend_python/response_sender/response_sender_test.py @@ -24,12 +24,17 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os import unittest import numpy as np import tritonclient.grpc as grpcclient from tritonclient.utils import InferenceServerException +# By default, find tritonserver on "localhost", but for windows tests +# we overwrite the IP address with the TRITONSERVER_IPADDR envvar +_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") + class ResponseSenderTest(unittest.TestCase): _inputs_parameters_zero_response_pre_return = { @@ -148,7 +153,7 @@ def callback(result, error): def _infer_parallel(self, model_name, parallel_inputs): callback, responses = self._generate_streaming_callback_and_responses_pair() - with grpcclient.InferenceServerClient("localhost:8001") as client: + with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client: client.start_stream(callback) for inputs in parallel_inputs: client.async_stream_infer(model_name, inputs) diff --git a/qa/L0_backend_python/response_sender/test.sh b/qa/L0_backend_python/response_sender/test.sh index cca7e7acfa..0db42746b3 100755 --- a/qa/L0_backend_python/response_sender/test.sh +++ b/qa/L0_backend_python/response_sender/test.sh @@ -94,8 +94,7 @@ if [ $? -ne 0 ]; then fi set -e -kill $SERVER_PID -wait $SERVER_PID +kill_server # # Test response sender to raise exception on response after complete final flag @@ -125,8 +124,7 @@ if [ $? -ne 0 ]; then fi set -e -kill $SERVER_PID -wait $SERVER_PID +kill_server # # Test async response sender under decoupled / non-decoupled diff --git a/qa/L0_batcher/queue_timeout_test.py b/qa/L0_batcher/queue_timeout_test.py index 886bf52a03..681621ba4b 100755 --- a/qa/L0_batcher/queue_timeout_test.py +++ b/qa/L0_batcher/queue_timeout_test.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import concurrent.futures +import os import time import unittest @@ -34,11 +35,15 @@ import tritonclient.grpc as grpcclient from tritonclient.utils import InferenceServerException +# By default, find tritonserver on "localhost", but for windows tests +# we overwrite the IP address with the TRITONSERVER_IPADDR envvar +_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost") + class TestMaxQueueDelayTimeout(unittest.TestCase): def setUp(self): # Initialize client - self._triton = grpcclient.InferenceServerClient("localhost:8001") + self._triton = grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") def _get_inputs(self, batch_size): self.assertIsInstance(batch_size, int) diff --git a/qa/L0_batcher/test.sh b/qa/L0_batcher/test.sh index 7043aab2a5..2eed5e3f13 100755 --- a/qa/L0_batcher/test.sh +++ b/qa/L0_batcher/test.sh @@ -85,6 +85,10 @@ if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends} SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe} export WSLENV=$WSLENV:TRITONSERVER_DELAY_SCHEDULER + TEST_WINDOWS=1 + # DLIS-7683 This test fails performance-related response time parameters + # when using HTTP protocol. Use gRPC protocol for now as a WAR. + export USE_GRPC=1 else MODELDIR=${MODELDIR:=`pwd`} DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"} @@ -601,7 +605,7 @@ done TEST_CASE=test_multi_batch_preserve_ordering # Skip test for Windows. Trace file concats at 8192 chars on Windows. -if [[ ! -v WSL_DISTRO_NAME ]] || [[ ! -v MSYSTEM ]]; then +if [ $TEST_WINDOWS -eq 0 ]; then rm -fr ./custom_models && mkdir ./custom_models && \ cp -r ../custom_models/custom_zero_1_float32 ./custom_models/. && \ mkdir -p ./custom_models/custom_zero_1_float32/1 @@ -754,7 +758,7 @@ mkdir -p models/dynamic_batch/1 && (cd models/dynamic_batch && \ TEST_LOG="queue_timeout_test.log" SERVER_LOG="./queue_timeout_test.server.log" -SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2" +SERVER_ARGS="--model-repository=$MODELDIR/models --log-verbose=2 --backend-directory=${BACKEND_DIR}" run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" @@ -771,8 +775,7 @@ if [ $? -ne 0 ]; then fi set -e -kill $SERVER_PID -wait $SERVER_PID +kill_server if [ $RET -eq 0 ]; then echo -e "\n***\n*** Test Passed\n***" diff --git a/qa/L0_trt_plugin/test.sh b/qa/L0_trt_plugin/test.sh index a9d04331f0..31d444c933 100755 --- a/qa/L0_trt_plugin/test.sh +++ b/qa/L0_trt_plugin/test.sh @@ -53,6 +53,7 @@ if [[ -v WSL_DISTRO_NAME ]] || [[ -v MSYSTEM ]]; then CUSTOMPLUGIN=${CUSTOMPLUGIN:=$MODELDIR/HardmaxPlugin.dll} BACKEND_DIR=${BACKEND_DIR:=C:/tritonserver/backends} SERVER=${SERVER:=/mnt/c/tritonserver/bin/tritonserver.exe} + TEST_WINDOWS=1 else DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"} MODELDIR=${MODELDIR:=`pwd`/models} @@ -135,7 +136,8 @@ SERVER_LD_PRELOAD=$CUSTOMPLUGIN SERVER_ARGS=$SERVER_ARGS_BASE SERVER_LOG="./inference_server_$LOG_IDX.log" -if [[ ! -v WSL_DISTRO_NAME ]] || [[ ! -v MSYSTEM ]]; then +# Skip test for Windows +if [ $TEST_WINDOWS -eq 0 ]; then run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" diff --git a/qa/common/infer_util.py b/qa/common/infer_util.py index 1827452336..edaf0ede47 100755 --- a/qa/common/infer_util.py +++ b/qa/common/infer_util.py @@ -701,7 +701,7 @@ def inferAndCheckResults( ) if cidx == expected0_sort_idx[b][idx]: tester.assertEqual( - ctuple[2], + ctuple[2].strip("\r"), "label{}".format(expected0_sort_idx[b][idx]), ) elif result_name == OUTPUT1: