Skip to content

Commit

Permalink
Test Python BLS with different sizes of CUDA memory pool (#6276)
Browse files Browse the repository at this point in the history
* Test with different sizes of CUDA memory pool

* Check the server log for error message

* Improve debugging

* Fix syntax
  • Loading branch information
krishung5 authored Oct 25, 2023
1 parent 2b1d267 commit b5c2e38
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 75 deletions.
138 changes: 75 additions & 63 deletions qa/L0_backend_python/bls/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ source ../../common/util.sh
TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
SERVER=${TRITON_DIR}/bin/tritonserver
BACKEND_DIR=${TRITON_DIR}/backends
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

RET=0
# This variable is used to print out the correct server log for each sub-test.
Expand Down Expand Up @@ -103,96 +102,109 @@ cp -r ${DATADIR}/qa_model_repository/libtorch_nobatch_float32_float32_float32/ .
sed -i 's/libtorch_nobatch_float32_float32_float32/libtorch_cpu/' models/libtorch_cpu/config.pbtxt && \
echo "instance_group [ { kind: KIND_CPU} ]" >> models/libtorch_cpu/config.pbtxt

for TRIAL in non_decoupled decoupled ; do
export BLS_KIND=$TRIAL
SERVER_LOG="./bls_$TRIAL.inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi
# Test with different sizes of CUDA memory pool
for CUDA_MEMORY_POOL_SIZE_MB in 64 128 ; do
CUDA_MEMORY_POOL_SIZE_BYTES=$((CUDA_MEMORY_POOL_SIZE_MB * 1024 * 1024))
SERVER_ARGS="--model-repository=`pwd`/models --backend-directory=${BACKEND_DIR} --log-verbose=1 --cuda-memory-pool-byte-size=0:${CUDA_MEMORY_POOL_SIZE_BYTES}"
for TRIAL in non_decoupled decoupled ; do
export BLS_KIND=$TRIAL
SERVER_LOG="./bls_$TRIAL.$CUDA_MEMORY_POOL_SIZE_MB.inference_server.log"

run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
set +e

export MODEL_NAME='bls'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
export MODEL_NAME='bls'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi
fi

export MODEL_NAME='bls_memory'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
export MODEL_NAME='bls_memory'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi
fi

export MODEL_NAME='bls_memory_async'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_async_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
export MODEL_NAME='bls_memory_async'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_async_memory' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi
fi

export MODEL_NAME='bls_async'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_async' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
export MODEL_NAME='bls_async'
python3 $CLIENT_PY >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** 'bls_async' $BLS_KIND test FAILED. \n***"
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
else
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Test Result Verification Failed\n***"
RET=1
SUB_TEST_RET=1
fi
fi
fi

set -e
set -e

kill $SERVER_PID
wait $SERVER_PID
kill $SERVER_PID
wait $SERVER_PID

if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi
if [ $SUB_TEST_RET -eq 1 ]; then
cat $CLIENT_LOG
cat $SERVER_LOG
fi

if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 128 ]]; then
if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then
echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 128 MB for 'bls' $BLS_KIND test\n***"
cat $SERVER_LOG
RET=1
fi
fi
done
done

# Test error handling when BLS is used in "initialize" or "finalize" function
Expand Down
24 changes: 12 additions & 12 deletions qa/python_models/bls/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
infer_request.flags(), pb_utils.TRITONSERVER_REQUEST_FLAG_SEQUENCE_START
)
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
output = from_dlpack(output.to_dlpack()).to("cpu").cpu().detach().numpy()
Expand All @@ -242,7 +242,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())

# The new output is the previous output + the current input
expected_output = output[0] + i
Expand Down Expand Up @@ -275,7 +275,7 @@ def _send_bls_sequence_requests(self, correlation_id, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
expected_output = output[0] + input.as_numpy()[0]
output = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT")
self.assertFalse(output.is_cpu())
Expand Down Expand Up @@ -345,7 +345,7 @@ def _get_gpu_bls_outputs(self, input0_pb, input1_pb, is_decoupled):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output1 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT1")
Expand Down Expand Up @@ -401,7 +401,7 @@ def test_zero_length_io(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
self.assertTrue(np.all(output0 == input0))
Expand Down Expand Up @@ -431,7 +431,7 @@ def test_bls_tensor_lifecycle(self):
next(infer_responses)
else:
infer_response = infer_request.exec()
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
np.testing.assert_equal(
Expand Down Expand Up @@ -472,7 +472,7 @@ def test_bls_tensor_lifecycle(self):
else:
infer_response = infer_request.exec()

self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())

output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUTPUT0")
output0_pytorch = from_dlpack(output0.to_dlpack())
Expand Down Expand Up @@ -638,7 +638,7 @@ def _test_response_iterator_square(
expected_output_cnt = np.array([expected_output_cnt], dtype=np.int32)

for infer_response in response_iterator:
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
if len(infer_response.output_tensors()) > 0:
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
Expand Down Expand Up @@ -671,7 +671,7 @@ def test_response_iterator(self):
# case 1. Use Next() to get the next response first, then use
# for-loop to get the remaining responses.
infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -695,7 +695,7 @@ def test_response_iterator(self):
# get the remaining responses.
response_count = 0
for infer_response in infer_responses:
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -705,7 +705,7 @@ def test_response_iterator(self):
break

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand All @@ -720,7 +720,7 @@ def test_response_iterator(self):
infer_responses = infer_request.exec(decoupled=True)

infer_response = next(infer_responses)
self.assertFalse(infer_response.has_error())
self.assertFalse(infer_response.has_error(), infer_response.error())
output0 = pb_utils.get_output_tensor_by_name(infer_response, "OUT")
self.assertIsNotNone(output0)
self.assertEqual(response_value, output0.as_numpy())
Expand Down

0 comments on commit b5c2e38

Please sign in to comment.