From 66779753d7effd0e239f9edf844d89d588afa6f7 Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Wed, 31 May 2023 19:31:33 -0700
Subject: [PATCH 1/3] Add test on rate limiter max resource decrease update

---
 qa/L0_model_update/instance_update_test.py | 18 ++++++++++++++++--
 qa/L0_model_update/test.sh                 |  9 +++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
index 5fa0eb3071..421292310f 100644
--- a/qa/L0_model_update/instance_update_test.py
+++ b/qa/L0_model_update/instance_update_test.py
@@ -338,10 +338,10 @@ def test_infer_while_updating(self):
         # Unload model
         self.__unload_model()
 
-    # Test instance resource requirement increase
+    # Test instance resource requirement update
     @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count",
                          "Rate limiter precondition not met for this test")
-    def test_instance_resource_increase(self):
+    def test_instance_resource_update(self):
         # Load model
         self.__load_model(
             1,
@@ -365,6 +365,20 @@ def infer():
             time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
             self.assertNotIn(False, infer_complete, "Infer possibly stuck")
             infer_thread.result()
+        # Decrease the resource requirement
+        self.__update_instance_count(
+            1, 1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}"
+        )
+        # Further decrease the resource requirement. The previous decrease
+        # should have lower the max resource in the rate limiter, which the
+        # error "Should not print this ..." should not be printed into the
+        # server log because the max resource is above the previously set limit
+        # and it will be checked by the main bash test script.
+        self.__update_instance_count(
+            1, 1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
+        )
         # Unload model
         self.__unload_model()
 
diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh
index 9246ef7ae1..5a4eb09d69 100755
--- a/qa/L0_model_update/test.sh
+++ b/qa/L0_model_update/test.sh
@@ -83,6 +83,15 @@ for RATE_LIMIT_MODE in "off" "execution_count"; do
     kill $SERVER_PID
     wait $SERVER_PID
 
+    set +e
+    grep "Should not print this" $SERVER_LOG
+    if [ $? -eq 0 ]; then
+        echo -e "\n***\n*** Found \"Should not print this\" on \"$SERVER_LOG\"\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+
 done
 
 if [ $RET -eq 0 ]; then

From 95c92f7df2b6f77886669d3e19fc2b293d1a0b1e Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Sun, 4 Jun 2023 17:32:55 -0700
Subject: [PATCH 2/3] Add test with explicit resource

---
 qa/L0_model_update/instance_update_test.py | 49 +++++++++++++++++-----
 qa/L0_model_update/test.sh                 | 11 +++--
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
index 421292310f..2c36e4e413 100644
--- a/qa/L0_model_update/instance_update_test.py
+++ b/qa/L0_model_update/instance_update_test.py
@@ -338,10 +338,10 @@ def test_infer_while_updating(self):
         # Unload model
         self.__unload_model()
 
-    # Test instance resource requirement update
-    @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count",
+    # Test instance resource requirement increase
+    @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
                          "Rate limiter precondition not met for this test")
-    def test_instance_resource_update(self):
+    def test_instance_resource_increase(self):
         # Load model
         self.__load_model(
             1,
@@ -365,19 +365,46 @@ def infer():
             time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
             self.assertNotIn(False, infer_complete, "Infer possibly stuck")
             infer_thread.result()
-        # Decrease the resource requirement
+        # Unload model
+        self.__unload_model()
+
+    # Test instance resource requirement increase above explicit resource
+    @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] ==
+                         "execution_count_with_explicit_resource",
+                         "Rate limiter precondition not met for this test")
+    def test_instance_resource_increase_above_explicit(self):
+        # Load model
+        self.__load_model(
+            1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
+        )
+        # Increase resource requirement
+        with self.assertRaises(InferenceServerException):
+            self.__update_instance_count(
+                0, 0,
+                "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 32\n}\n]\n}\n}"
+            )
+        # Correct the resource requirement to match the explicit resource
         self.__update_instance_count(
             1, 1,
-            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}"
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 10\n}\n]\n}\n}"
         )
-        # Further decrease the resource requirement. The previous decrease
-        # should have lower the max resource in the rate limiter, which the
-        # error "Should not print this ..." should not be printed into the
-        # server log because the max resource is above the previously set limit
-        # and it will be checked by the main bash test script.
+        # Unload model
+        self.__unload_model()
+
+    # Test instance resource requirement decrease
+    @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
+                         "Rate limiter precondition not met for this test")
+    def test_instance_resource_decrease(self):
+        # Load model
+        self.__load_model(
+            1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
+        )
+        # Decrease resource requirement
         self.__update_instance_count(
             1, 1,
-            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
         )
         # Unload model
         self.__unload_model()
diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh
index 5a4eb09d69..7f8c23e38a 100755
--- a/qa/L0_model_update/test.sh
+++ b/qa/L0_model_update/test.sh
@@ -55,15 +55,20 @@ function setup_models() {
 
 RET=0
 
-# Test model instance update with and without rate limiting enabled
-for RATE_LIMIT_MODE in "off" "execution_count"; do
+# Test model instance update with rate limiting on/off and explicit resource
+for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do
+
+    RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE"
+    if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then
+        RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10"
+    fi
 
     export RATE_LIMIT_MODE=$RATE_LIMIT_MODE
     TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log"
     SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log"
 
     setup_models
-    SERVER_ARGS="--model-repository=models --model-control-mode=explicit --rate-limit=$RATE_LIMIT_MODE --log-verbose=2"
+    SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2"
     run_server
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"

From fbdf0ce682e449d5f77d2816c97cba33219c4844 Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Thu, 8 Jun 2023 19:30:04 -0700
Subject: [PATCH 3/3] Check server log for decreased resource limit

---
 qa/L0_model_update/instance_update_test.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
index 2c36e4e413..e849b8a70e 100644
--- a/qa/L0_model_update/instance_update_test.py
+++ b/qa/L0_model_update/instance_update_test.py
@@ -404,10 +404,26 @@ def test_instance_resource_decrease(self):
         # Decrease resource requirement
         self.__update_instance_count(
             1, 1,
-            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 3\n}\n]\n}\n}"
         )
         # Unload model
         self.__unload_model()
+        # The resource count of 3 is unique across this entire test, so check
+        # the server output to make sure it is printed, which ensures the
+        # max resource is actually decreased.
+        time.sleep(1)  # make sure the log file is updated
+        log_path = os.path.join(
+            os.environ["MODEL_LOG_DIR"], "instance_update_test.rate_limit_" +
+            os.environ["RATE_LIMIT_MODE"] + ".server.log")
+        with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
+            if os.environ["RATE_LIMIT_MODE"] == "execution_count":
+                # Make sure the previous max resource limit of 4 is reduced to 3
+                # when no explicit limit is set.
+                self.assertIn("Resource: R1\t Count: 3", f.read())
+            else:
+                # Make sure the max resource limit is never set to 3 when
+                # explicit limit of 10 is set.
+                self.assertNotIn("Resource: R1\t Count: 3", f.read())
 
     # Test for instance update on direct sequence scheduling
     @unittest.skip("Sequence will not continue after update [FIXME: DLIS-4820]")