Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test on rate limiter max resource decrease update #5885

Merged
merged 3 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 58 additions & 1 deletion qa/L0_model_update/instance_update_test.py
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def test_infer_while_updating(self):
self.__unload_model()

# Test instance resource requirement increase
@unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count",
@unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
"Rate limiter precondition not met for this test")
def test_instance_resource_increase(self):
# Load model
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -368,6 +368,63 @@ def infer():
# Unload model
self.__unload_model()

# Test instance resource requirement increase above explicit resource
@unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] ==
"execution_count_with_explicit_resource",
"Rate limiter precondition not met for this test")
def test_instance_resource_increase_above_explicit(self):
# Load model
self.__load_model(
1,
"{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
)
# Increase resource requirement
with self.assertRaises(InferenceServerException):
self.__update_instance_count(
0, 0,
"{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 32\n}\n]\n}\n}"
)
# Correct the resource requirement to match the explicit resource
self.__update_instance_count(
1, 1,
"{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 10\n}\n]\n}\n}"
)
# Unload model
self.__unload_model()

# Test instance resource requirement decrease
@unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
"Rate limiter precondition not met for this test")
def test_instance_resource_decrease(self):
# Load model
self.__load_model(
1,
"{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
)
# Decrease resource requirement
self.__update_instance_count(
1, 1,
"{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 3\n}\n]\n}\n}"
)
# Unload model
self.__unload_model()
# The resource count of 3 is unique across this entire test, so check
# the server output to make sure it is printed, which ensures the
# max resource is actually decreased.
time.sleep(1) # make sure the log file is updated
log_path = os.path.join(
os.environ["MODEL_LOG_DIR"], "instance_update_test.rate_limit_" +
os.environ["RATE_LIMIT_MODE"] + ".server.log")
with open(log_path, mode="r", encoding="utf-8", errors="strict") as f:
if os.environ["RATE_LIMIT_MODE"] == "execution_count":
# Make sure the previous max resource limit of 4 is reduced to 3
# when no explicit limit is set.
self.assertIn("Resource: R1\t Count: 3", f.read())
else:
# Make sure the max resource limit is never set to 3 when
# explicit limit of 10 is set.
self.assertNotIn("Resource: R1\t Count: 3", f.read())

# Test for instance update on direct sequence scheduling
@unittest.skip("Sequence will not continue after update [FIXME: DLIS-4820]")
def test_instance_update_on_direct_sequence_scheduling(self):
Expand Down
20 changes: 17 additions & 3 deletions qa/L0_model_update/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,20 @@ function setup_models() {

RET=0

# Test model instance update with and without rate limiting enabled
for RATE_LIMIT_MODE in "off" "execution_count"; do
# Test model instance update with rate limiting on/off and explicit resource
for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do

RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE"
if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then
RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10"
fi

export RATE_LIMIT_MODE=$RATE_LIMIT_MODE
TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log"
SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log"

setup_models
SERVER_ARGS="--model-repository=models --model-control-mode=explicit --rate-limit=$RATE_LIMIT_MODE --log-verbose=2"
SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
Expand All @@ -83,6 +88,15 @@ for RATE_LIMIT_MODE in "off" "execution_count"; do
kill $SERVER_PID
wait $SERVER_PID

set +e
grep "Should not print this" $SERVER_LOG
if [ $? -eq 0 ]; then
echo -e "\n***\n*** Found \"Should not print this\" on \"$SERVER_LOG\"\n***"
cat $SERVER_LOG
RET=1
fi
set -e

done

if [ $RET -eq 0 ]; then
Expand Down