From 66779753d7effd0e239f9edf844d89d588afa6f7 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 31 May 2023 19:31:33 -0700 Subject: [PATCH 1/3] Add test on rate limiter max resource decrease update --- qa/L0_model_update/instance_update_test.py | 18 ++++++++++++++++-- qa/L0_model_update/test.sh | 9 +++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 5fa0eb3071..421292310f 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -338,10 +338,10 @@ def test_infer_while_updating(self): # Unload model self.__unload_model() - # Test instance resource requirement increase + # Test instance resource requirement update @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count", "Rate limiter precondition not met for this test") - def test_instance_resource_increase(self): + def test_instance_resource_update(self): # Load model self.__load_model( 1, @@ -365,6 +365,20 @@ def infer(): time.sleep(infer_count / 2) # each infer should take < 0.5 seconds self.assertNotIn(False, infer_complete, "Infer possibly stuck") infer_thread.result() + # Decrease the resource requirement + self.__update_instance_count( + 1, 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}" + ) + # Further decrease the resource requirement. The previous decrease + # should have lower the max resource in the rate limiter, which the + # error "Should not print this ..." should not be printed into the + # server log because the max resource is above the previously set limit + # and it will be checked by the main bash test script. + self.__update_instance_count( + 1, 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}" + ) # Unload model self.__unload_model() diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh index 9246ef7ae1..5a4eb09d69 100755 --- a/qa/L0_model_update/test.sh +++ b/qa/L0_model_update/test.sh @@ -83,6 +83,15 @@ for RATE_LIMIT_MODE in "off" "execution_count"; do kill $SERVER_PID wait $SERVER_PID + set +e + grep "Should not print this" $SERVER_LOG + if [ $? -eq 0 ]; then + echo -e "\n***\n*** Found \"Should not print this\" on \"$SERVER_LOG\"\n***" + cat $SERVER_LOG + RET=1 + fi + set -e + done if [ $RET -eq 0 ]; then From 95c92f7df2b6f77886669d3e19fc2b293d1a0b1e Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Sun, 4 Jun 2023 17:32:55 -0700 Subject: [PATCH 2/3] Add test with explicit resource --- qa/L0_model_update/instance_update_test.py | 49 +++++++++++++++++----- qa/L0_model_update/test.sh | 11 +++-- 2 files changed, 46 insertions(+), 14 deletions(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 421292310f..2c36e4e413 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -338,10 +338,10 @@ def test_infer_while_updating(self): # Unload model self.__unload_model() - # Test instance resource requirement update - @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count", + # Test instance resource requirement increase + @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"], "Rate limiter precondition not met for this test") - def test_instance_resource_update(self): + def test_instance_resource_increase(self): # Load model self.__load_model( 1, @@ -365,19 +365,46 @@ def infer(): time.sleep(infer_count / 2) # each infer should take < 0.5 seconds self.assertNotIn(False, infer_complete, "Infer possibly stuck") infer_thread.result() - # Decrease the resource requirement + # Unload model + self.__unload_model() + + # Test instance resource requirement increase above explicit resource + @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == + "execution_count_with_explicit_resource", + "Rate limiter precondition not met for this test") + def test_instance_resource_increase_above_explicit(self): + # Load model + self.__load_model( + 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}" + ) + # Increase resource requirement + with self.assertRaises(InferenceServerException): + self.__update_instance_count( + 0, 0, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 32\n}\n]\n}\n}" + ) + # Correct the resource requirement to match the explicit resource self.__update_instance_count( 1, 1, - "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}" + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 10\n}\n]\n}\n}" ) - # Further decrease the resource requirement. The previous decrease - # should have lower the max resource in the rate limiter, which the - # error "Should not print this ..." should not be printed into the - # server log because the max resource is above the previously set limit - # and it will be checked by the main bash test script. + # Unload model + self.__unload_model() + + # Test instance resource requirement decrease + @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"], + "Rate limiter precondition not met for this test") + def test_instance_resource_decrease(self): + # Load model + self.__load_model( + 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}" + ) + # Decrease resource requirement self.__update_instance_count( 1, 1, - "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}" + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}" ) # Unload model self.__unload_model() diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh index 5a4eb09d69..7f8c23e38a 100755 --- a/qa/L0_model_update/test.sh +++ b/qa/L0_model_update/test.sh @@ -55,15 +55,20 @@ function setup_models() { RET=0 -# Test model instance update with and without rate limiting enabled -for RATE_LIMIT_MODE in "off" "execution_count"; do +# Test model instance update with rate limiting on/off and explicit resource +for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do + + RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE" + if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then + RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10" + fi export RATE_LIMIT_MODE=$RATE_LIMIT_MODE TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log" SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log" setup_models - SERVER_ARGS="--model-repository=models --model-control-mode=explicit --rate-limit=$RATE_LIMIT_MODE --log-verbose=2" + SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2" run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" From fbdf0ce682e449d5f77d2816c97cba33219c4844 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 8 Jun 2023 19:30:04 -0700 Subject: [PATCH 3/3] Check server log for decreased resource limit --- qa/L0_model_update/instance_update_test.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 2c36e4e413..e849b8a70e 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -404,10 +404,26 @@ def test_instance_resource_decrease(self): # Decrease resource requirement self.__update_instance_count( 1, 1, - "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}" + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 3\n}\n]\n}\n}" ) # Unload model self.__unload_model() + # The resource count of 3 is unique across this entire test, so check + # the server output to make sure it is printed, which ensures the + # max resource is actually decreased. + time.sleep(1) # make sure the log file is updated + log_path = os.path.join( + os.environ["MODEL_LOG_DIR"], "instance_update_test.rate_limit_" + + os.environ["RATE_LIMIT_MODE"] + ".server.log") + with open(log_path, mode="r", encoding="utf-8", errors="strict") as f: + if os.environ["RATE_LIMIT_MODE"] == "execution_count": + # Make sure the previous max resource limit of 4 is reduced to 3 + # when no explicit limit is set. + self.assertIn("Resource: R1\t Count: 3", f.read()) + else: + # Make sure the max resource limit is never set to 3 when + # explicit limit of 10 is set. + self.assertNotIn("Resource: R1\t Count: 3", f.read()) # Test for instance update on direct sequence scheduling @unittest.skip("Sequence will not continue after update [FIXME: DLIS-4820]")