From 03f37f1892eea871dfee7c3e4cc60d4c9fae418b Mon Sep 17 00:00:00 2001 From: tanmayv25 Date: Tue, 7 Nov 2023 20:02:52 -0800 Subject: [PATCH 1/3] Enhance testing for pending request count --- qa/L0_metrics/metrics_queue_size_test.py | 43 +++++++++++++++++++----- qa/L0_metrics/test.sh | 6 +++- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/qa/L0_metrics/metrics_queue_size_test.py b/qa/L0_metrics/metrics_queue_size_test.py index 18a601d01b..583d86cb26 100755 --- a/qa/L0_metrics/metrics_queue_size_test.py +++ b/qa/L0_metrics/metrics_queue_size_test.py @@ -83,7 +83,10 @@ def setUp(self): url=self.server_url, concurrency=self.concurrency ) - def _validate_model_config(self, model_name): + # Test specific configurations + self.max_queue_size = 0 + + def _validate_model_config(self, model_name, max_queue_size = 0): config = self.client.get_model_config(model_name) print(config) params = config.get("parameters", {}) @@ -91,6 +94,13 @@ def _validate_model_config(self, model_name): max_batch_size = config.get("max_batch_size") self.assertEqual(delay_ms, self.delay_ms) self.assertEqual(max_batch_size, self.max_batch_size) + + dynamic_batching = config.get("dynamic_batching", {}) + default_queue_policy = dynamic_batching.get("default_queue_policy", {}) + self.max_queue_size = default_queue_policy.get("max_queue_size", 0) + + self.assertEqual(self.max_queue_size, max_queue_size) + return config def _get_metrics(self): @@ -148,8 +158,8 @@ def _send_async_requests_sequence(self, num_seq_slots, model_name, inputs, futur ) num_sent += 1 - def _test_helper(self, model_name, batch_size, send_requests_func): - self._validate_model_config(model_name) + def _test_helper(self, model_name, batch_size, send_requests_func, max_queue_size = 0): + self._validate_model_config(model_name, max_queue_size=max_queue_size) queue_size = QUEUE_METRIC_TEMPLATE.format(model_name=model_name) infer_count = INFER_METRIC_TEMPLATE.format(model_name=model_name) @@ -162,7 +172,12 @@ def _test_helper(self, model_name, batch_size, send_requests_func): # Give Triton a second to load all requests into queues time.sleep(1) - starting_queue_size = self.num_requests - batch_size + if max_queue_size != 0: + self._assert_metric_equals(queue_size, max_queue_size) + starting_queue_size = max_queue_size + else: + starting_queue_size = self.num_requests - batch_size + # Start from (num_requests-batch_size) because 1 batch should be executing, # and the rest of the requests should be queued. for expected_queue_size in range(starting_queue_size, 0, -1 * batch_size): @@ -174,13 +189,20 @@ def _test_helper(self, model_name, batch_size, send_requests_func): time.sleep(self.delay_sec) # All requests should've been executed without any batching - self._assert_metric_equals(infer_count, self.num_requests) - expected_exec_count = math.ceil(self.num_requests / batch_size) + expected_infer_count = starting_queue_size + batch_size + self._assert_metric_equals(infer_count, expected_infer_count) + expected_exec_count = math.ceil(expected_infer_count/ batch_size) self._assert_metric_equals(exec_count, expected_exec_count) - # Verify no inference exceptions were raised + failed_count = 0 for future in futures: - future.get_result() + try: + future.get_result() + except Exception as e: + failed_count = failed_count + 1 + + self.assertEqual(failed_count, self.num_requests - batch_size - starting_queue_size) + def test_default_scheduler(self): model_name = "default" @@ -194,6 +216,11 @@ def test_dynamic_batch_scheduler(self): batch_size = self.max_batch_size self._test_helper(model_name, batch_size, self._send_async_requests) + def test_fail_max_queue_size(self): + model_name = "max_queue_size" + batch_size = self.max_batch_size + self._test_helper(model_name, batch_size, self._send_async_requests, max_queue_size=4) + def test_sequence_batch_scheduler_direct(self): model_name = "sequence_direct" # With sufficient queue delay and minimum_slot_utilization set, we diff --git a/qa/L0_metrics/test.sh b/qa/L0_metrics/test.sh index 9b0f634b82..dea1c62041 100755 --- a/qa/L0_metrics/test.sh +++ b/qa/L0_metrics/test.sh @@ -325,6 +325,10 @@ DYNAMIC_MODEL="${MODELDIR}/dynamic" cp -r "${DEFAULT_MODEL}" "${DYNAMIC_MODEL}" echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} }\n" >> "${DYNAMIC_MODEL}/config.pbtxt" +MAX_QUEUE_SIZE_MODEL="${MODELDIR}/max_queue_size" +cp -r "${DEFAULT_MODEL}" "${MAX_QUEUE_SIZE_MODEL}" +echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} default_queue_policy { max_queue_size: 4 } }\n" >> "${MAX_QUEUE_SIZE_MODEL}/config.pbtxt" + SEQUENCE_DIRECT_MODEL="${MODELDIR}/sequence_direct" cp -r "${DEFAULT_MODEL}" "${SEQUENCE_DIRECT_MODEL}" echo -e "\nsequence_batching { direct { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, minimum_slot_utilization: 1.0 } }\n" >> "${SEQUENCE_DIRECT_MODEL}/config.pbtxt" @@ -347,7 +351,7 @@ run_and_check_server python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG} kill $SERVER_PID wait $SERVER_PID -expected_tests=5 +expected_tests=6 check_unit_test "${expected_tests}" if [ $RET -eq 0 ]; then From 5bf3676e87e0298dcbb590df42c25381386e7a27 Mon Sep 17 00:00:00 2001 From: tanmayv25 Date: Tue, 7 Nov 2023 20:13:42 -0800 Subject: [PATCH 2/3] Improve the documentation --- qa/L0_metrics/metrics_queue_size_test.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/qa/L0_metrics/metrics_queue_size_test.py b/qa/L0_metrics/metrics_queue_size_test.py index 583d86cb26..e992e06597 100755 --- a/qa/L0_metrics/metrics_queue_size_test.py +++ b/qa/L0_metrics/metrics_queue_size_test.py @@ -86,7 +86,7 @@ def setUp(self): # Test specific configurations self.max_queue_size = 0 - def _validate_model_config(self, model_name, max_queue_size = 0): + def _validate_model_config(self, model_name, max_queue_size=0): config = self.client.get_model_config(model_name) print(config) params = config.get("parameters", {}) @@ -158,7 +158,9 @@ def _send_async_requests_sequence(self, num_seq_slots, model_name, inputs, futur ) num_sent += 1 - def _test_helper(self, model_name, batch_size, send_requests_func, max_queue_size = 0): + def _test_helper( + self, model_name, batch_size, send_requests_func, max_queue_size=0 + ): self._validate_model_config(model_name, max_queue_size=max_queue_size) queue_size = QUEUE_METRIC_TEMPLATE.format(model_name=model_name) @@ -172,14 +174,16 @@ def _test_helper(self, model_name, batch_size, send_requests_func, max_queue_siz # Give Triton a second to load all requests into queues time.sleep(1) + # Start from (num_requests-batch_size) because 1 batch should be executing, + # and the rest of the requests should be queued. + # If max_queue_size is specified then the queued requests would be capped + # at max_queue_size. if max_queue_size != 0: self._assert_metric_equals(queue_size, max_queue_size) starting_queue_size = max_queue_size else: starting_queue_size = self.num_requests - batch_size - # Start from (num_requests-batch_size) because 1 batch should be executing, - # and the rest of the requests should be queued. for expected_queue_size in range(starting_queue_size, 0, -1 * batch_size): self._assert_metric_equals(queue_size, expected_queue_size) time.sleep(self.delay_sec) @@ -191,7 +195,7 @@ def _test_helper(self, model_name, batch_size, send_requests_func, max_queue_siz # All requests should've been executed without any batching expected_infer_count = starting_queue_size + batch_size self._assert_metric_equals(infer_count, expected_infer_count) - expected_exec_count = math.ceil(expected_infer_count/ batch_size) + expected_exec_count = math.ceil(expected_infer_count / batch_size) self._assert_metric_equals(exec_count, expected_exec_count) failed_count = 0 @@ -201,8 +205,9 @@ def _test_helper(self, model_name, batch_size, send_requests_func, max_queue_siz except Exception as e: failed_count = failed_count + 1 - self.assertEqual(failed_count, self.num_requests - batch_size - starting_queue_size) - + self.assertEqual( + failed_count, self.num_requests - batch_size - starting_queue_size + ) def test_default_scheduler(self): model_name = "default" @@ -219,7 +224,9 @@ def test_dynamic_batch_scheduler(self): def test_fail_max_queue_size(self): model_name = "max_queue_size" batch_size = self.max_batch_size - self._test_helper(model_name, batch_size, self._send_async_requests, max_queue_size=4) + self._test_helper( + model_name, batch_size, self._send_async_requests, max_queue_size=4 + ) def test_sequence_batch_scheduler_direct(self): model_name = "sequence_direct" From 43cee98fa5145cbd54f988884e7c9585937e2899 Mon Sep 17 00:00:00 2001 From: tanmayv25 Date: Wed, 8 Nov 2023 14:24:22 -0800 Subject: [PATCH 3/3] Add more documentation --- qa/L0_metrics/metrics_queue_size_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/qa/L0_metrics/metrics_queue_size_test.py b/qa/L0_metrics/metrics_queue_size_test.py index e992e06597..0554274109 100755 --- a/qa/L0_metrics/metrics_queue_size_test.py +++ b/qa/L0_metrics/metrics_queue_size_test.py @@ -223,6 +223,10 @@ def test_dynamic_batch_scheduler(self): def test_fail_max_queue_size(self): model_name = "max_queue_size" + # This test checks whether metrics are properly accounts for requests + # that fail to enqueue on the server. The test sets the max_queue_size + # and any additional requests beyond the specified queue size should fail + # instead of waiting for execution. batch_size = self.max_batch_size self._test_helper( model_name, batch_size, self._send_async_requests, max_queue_size=4