From 73c7726336a5f4958d526c6224fb15b9690ad84e Mon Sep 17 00:00:00 2001 From: krishung5 Date: Wed, 8 Nov 2023 01:47:09 -0800 Subject: [PATCH] Revert test changes --- qa/L0_backend_python/python_unittest.py | 2 +- .../request_rescheduling/test.sh | 7 - .../bls_request_rescheduling/model.py | 54 +------ .../request_rescheduling_cases/config.pbtxt | 51 ------ .../request_rescheduling_cases/model.py | 147 ------------------ 5 files changed, 6 insertions(+), 255 deletions(-) delete mode 100644 qa/python_models/request_rescheduling_cases/config.pbtxt delete mode 100644 qa/python_models/request_rescheduling_cases/model.py diff --git a/qa/L0_backend_python/python_unittest.py b/qa/L0_backend_python/python_unittest.py index a00ee1cb994..c956412f9de 100755 --- a/qa/L0_backend_python/python_unittest.py +++ b/qa/L0_backend_python/python_unittest.py @@ -76,7 +76,7 @@ def test_python_unittest(self): self._run_unittest(model_name) # [FIXME] See DLIS-3684 - # self._run_unittest(model_name) + self._run_unittest(model_name) with self._shm_leak_detector.Probe() as shm_probe: self._run_unittest(model_name) else: diff --git a/qa/L0_backend_python/request_rescheduling/test.sh b/qa/L0_backend_python/request_rescheduling/test.sh index b290c90bb1a..cecf2b2812b 100755 --- a/qa/L0_backend_python/request_rescheduling/test.sh +++ b/qa/L0_backend_python/request_rescheduling/test.sh @@ -39,9 +39,6 @@ RET=0 rm -fr *.log ./models *.txt -pip3 uninstall -y torch -pip3 install torch==1.13.0+cu117 -f https://download.pytorch.org/whl/torch_stable.html - mkdir -p models/bls_request_rescheduling/1/ cp ../../python_models/bls_request_rescheduling/model.py models/bls_request_rescheduling/1/ cp ../../python_models/bls_request_rescheduling/config.pbtxt models/bls_request_rescheduling @@ -54,10 +51,6 @@ mkdir -p models/generative_sequence/1/ cp ../../python_models/generative_sequence/model.py models/generative_sequence/1/ cp ../../python_models/generative_sequence/config.pbtxt models/generative_sequence -mkdir -p models/request_rescheduling_cases/1/ -cp ../../python_models/request_rescheduling_cases/model.py models/request_rescheduling_cases/1/ -cp ../../python_models/request_rescheduling_cases/config.pbtxt models/request_rescheduling_cases - mkdir -p models/wrong_return_type/1/ cp ../../python_models/wrong_return_type/model.py models/wrong_return_type/1/ cp ../../python_models/wrong_return_type/config.pbtxt models/wrong_return_type diff --git a/qa/python_models/bls_request_rescheduling/model.py b/qa/python_models/bls_request_rescheduling/model.py index af630a4e838..5599618c71d 100644 --- a/qa/python_models/bls_request_rescheduling/model.py +++ b/qa/python_models/bls_request_rescheduling/model.py @@ -24,6 +24,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import time import unittest import numpy as np @@ -78,6 +79,10 @@ def test_decoupled_e2e(self): model_name = "generative_sequence" # Reload the model to reset the flag for multiple iterations pb_utils.unload_model(model_name) + # TODO: Make this more robust to wait until fully unloaded + print("Sleep 10 seconds to make sure model finishes unloading...", flush=True) + time.sleep(10) + print("Done sleeping.", flush=True) pb_utils.load_model(model_name) input_value = 3 @@ -102,55 +107,6 @@ def test_decoupled_e2e(self): self.assertEqual(expected_output, output0.as_numpy()[0]) expected_output -= 1 - def test_send_final_flag_before_rescheduling_request(self): - model_name = "request_rescheduling_cases" - # Reload the model to reset the flag for multiple iterations - pb_utils.unload_model(model_name) - pb_utils.load_model(model_name) - - case_value = 0 - input0 = pb_utils.Tensor("IN", np.array([case_value], dtype=np.int32)) - infer_request = pb_utils.InferenceRequest( - model_name=model_name, - inputs=[input0], - requested_output_names=["OUT"], - ) - infer_responses = infer_request.exec(decoupled=True) - for infer_response in infer_responses: - self.assertFalse(infer_response.has_error()) - - if len(infer_response.output_tensors()) > 0: - output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT") - self.assertIsNotNone(output0) - - self.assertEqual(case_value, output0.as_numpy()[0]) - - def test_process_request_in_different_thread(self): - model_name = "request_rescheduling_cases" - # Reload the model to reset the flag for multiple iterations - pb_utils.unload_model(model_name) - pb_utils.load_model(model_name) - - case_value = 1 - input0 = pb_utils.Tensor("IN", np.array([case_value], dtype=np.int32)) - infer_request = pb_utils.InferenceRequest( - model_name=model_name, - inputs=[input0], - requested_output_names=["OUT"], - ) - infer_responses = infer_request.exec(decoupled=True) - - expected_output = case_value - for infer_response in infer_responses: - self.assertFalse(infer_response.has_error()) - - if len(infer_response.output_tensors()) > 0: - output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT") - self.assertIsNotNone(output0) - - self.assertEqual(expected_output, output0.as_numpy()[0]) - expected_output -= 1 - class TritonPythonModel: def execute(self, requests): diff --git a/qa/python_models/request_rescheduling_cases/config.pbtxt b/qa/python_models/request_rescheduling_cases/config.pbtxt deleted file mode 100644 index 19b6db68f37..00000000000 --- a/qa/python_models/request_rescheduling_cases/config.pbtxt +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -name: "request_rescheduling_cases" -backend: "python" -max_batch_size: 0 -model_transaction_policy { - decoupled: True -} -input [ - { - name: "IN" - data_type: TYPE_INT32 - dims: [ 1 ] - } -] -output [ - { - name: "OUT" - data_type: TYPE_INT32 - dims: [ 1 ] - } -] -sequence_batching { - generative_sequence : true -} - -instance_group [{ kind: KIND_CPU }] diff --git a/qa/python_models/request_rescheduling_cases/model.py b/qa/python_models/request_rescheduling_cases/model.py deleted file mode 100644 index c23d889fd7b..00000000000 --- a/qa/python_models/request_rescheduling_cases/model.py +++ /dev/null @@ -1,147 +0,0 @@ -# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json -import threading - -import numpy as np -import triton_python_backend_utils as pb_utils - - -class TritonPythonModel: - def initialize(self, args): - self.model_config = model_config = json.loads(args["model_config"]) - - using_decoupled = pb_utils.using_decoupled_model_transaction_policy( - model_config - ) - if not using_decoupled: - raise pb_utils.TritonModelException( - """the model `{}` can generate any number of responses per request, - enable decoupled transaction policy in model configuration to - serve this model""".format( - args["model_name"] - ) - ) - - # Get IN configuration - in_config = pb_utils.get_input_config_by_name(model_config, "IN") - - # Validate the shape and data type of IN - in_shape = in_config["dims"] - if (len(in_shape) != 1) or (in_shape[0] != 1): - raise pb_utils.TritonModelException( - """the model `{}` requires the shape of 'IN' to be - [1], got {}""".format( - args["model_name"], in_shape - ) - ) - if in_config["data_type"] != "TYPE_INT32": - raise pb_utils.TritonModelException( - """the model `{}` requires the data_type of 'IN' to be - 'TYPE_INT32', got {}""".format( - args["model_name"], in_config["data_type"] - ) - ) - - # Get OUT configuration - out_config = pb_utils.get_output_config_by_name(model_config, "OUT") - - # Validate the shape and data type of OUT - out_shape = out_config["dims"] - if (len(out_shape) != 1) or (out_shape[0] != 1): - raise pb_utils.TritonModelException( - """the model `{}` requires the shape of 'OUT' to be - [1], got {}""".format( - args["model_name"], out_shape - ) - ) - if out_config["data_type"] != "TYPE_INT32": - raise pb_utils.TritonModelException( - """the model `{}` requires the data_type of 'OUT' to be - 'TYPE_INT32', got {}""".format( - args["model_name"], out_config["data_type"] - ) - ) - - self.idx = 0 - self.inflight_thread_count = 0 - self.inflight_thread_count_lck = threading.Lock() - - def execute(self, requests): - for request in requests: - case = pb_utils.get_input_tensor_by_name(request, "IN").as_numpy() - - if case[0] == 0: - self.send_final_flag_before_rescheduling_request(request) - elif case[0] == 1: - self.process_request_thread(request) - else: - raise pb_utils.TritonModelException("Unknown test case.") - - return None - - def send_final_flag_before_rescheduling_request(self, request): - response_sender = request.get_response_sender() - if self.idx == 0: - out_output = pb_utils.Tensor("OUT", np.array([0], np.int32)) - response = pb_utils.InferenceResponse(output_tensors=[out_output]) - response_sender.send(response) - response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) - request.set_release_flags(pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE) - self.idx = 1 - - def process_request_thread(self, request): - thread = threading.Thread( - target=self.response_thread, - args=( - request.get_response_sender(), - pb_utils.get_input_tensor_by_name(request, "IN").as_numpy(), - ), - ) - - thread.daemon = True - - with self.inflight_thread_count_lck: - self.inflight_thread_count += 1 - - if self.idx == 0: - request.set_release_flags(pb_utils.TRITONSERVER_REQUEST_RELEASE_RESCHEDULE) - thread.start() - self.idx = 1 - - def response_thread(self, response_sender, in_input): - output_value = in_input[0] - while output_value >= 0: - out_output = pb_utils.Tensor("OUT", np.array([output_value], np.int32)) - response = pb_utils.InferenceResponse(output_tensors=[out_output]) - response_sender.send(response) - output_value -= 1 - - response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) - - with self.inflight_thread_count_lck: - self.inflight_thread_count -= 1