diff --git a/python/test/test_api.py b/python/test/test_api.py index 0f4a22a94..a6adde27d 100644 --- a/python/test/test_api.py +++ b/python/test/test_api.py @@ -345,11 +345,6 @@ def test_ready(self): server = tritonserver.Server(self._server_options).start() self.assertTrue(server.ready()) - @pytest.mark.xfail( - tritonserver.__version__ <= "2.46.0", - reason="Known issue on stop: Exit timeout expired. Exiting immediately", - raises=tritonserver.InternalError, - ) def test_stop(self): server = tritonserver.Server(self._server_options).start(wait_until_ready=True) @@ -455,15 +450,20 @@ def test_basic_inference(self): "bool_input": numpy.random.rand(1, 100).astype(dtype=numpy.bool_), } - for response in server.model("test").infer( + response_iterator = server.model("test").infer( inputs=inputs, output_memory_type="cpu", raise_on_error=True, - ): - for input_name, input_value in inputs.items(): - output_value = response.outputs[input_name.replace("input", "output")] - output_value = numpy.from_dlpack(output_value) - numpy.testing.assert_array_equal(input_value, output_value) + ) + + responses = list(response_iterator) + self.assertTrue(len(responses), 1) + response = responses[0] + + for input_name, input_value in inputs.items(): + output_value = response.outputs[input_name.replace("input", "output")] + output_value = numpy.from_dlpack(output_value) + numpy.testing.assert_array_equal(input_value, output_value) # test normal bool inputs = {"bool_input": [[True, False, False, True]]} diff --git a/python/tritonserver/_api/_response.py b/python/tritonserver/_api/_response.py index 8dc505298..60418dc64 100644 --- a/python/tritonserver/_api/_response.py +++ b/python/tritonserver/_api/_response.py @@ -29,7 +29,6 @@ from __future__ import annotations import asyncio -import inspect import queue from dataclasses import asdict, dataclass, field from typing import TYPE_CHECKING, Optional @@ -182,9 +181,6 @@ def _response_callback(self, response, flags, unused): asyncio.run_coroutine_threadsafe( self._user_queue.put(response), self._loop ) - if flags == TRITONSERVER_ResponseCompleteFlag.FINAL: - del self._request - self._request = None except Exception as e: message = f"Catastrophic failure in response callback: {e}" LogMessage(LogLevel.ERROR, message) @@ -308,9 +304,6 @@ def _response_callback(self, response, flags, unused): self._queue.put(response) if self._user_queue is not None: self._user_queue.put(response) - if flags == TRITONSERVER_ResponseCompleteFlag.FINAL: - del self._request - self._request = None except Exception as e: message = f"Catastrophic failure in response callback: {e}" LogMessage(LogLevel.ERROR, message)