triton-inference-server · rmccorm4 · Jun 8, 2023 · Jun 7, 2023 · Jun 7, 2023 · Jun 8, 2023
diff --git a/build.py b/build.py
@@ -1768,8 +1768,7 @@ def enable_all():
             'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = ['gcs', 's3', 'azure_storage']
         all_endpoints = ['http', 'grpc', 'sagemaker', 'vertex-ai']
 
@@ -1787,8 +1786,7 @@ def enable_all():
             'openvino', 'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = []
         all_endpoints = ['http', 'grpc']
 

diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
@@ -105,6 +105,7 @@ For tags `>=23.03`,
 [tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
 come with the following cache implementations out of the box:
 - [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`
+- [redis](https://github.com/triton-inference-server/redis_cache): `/opt/tritonserver/caches/redis/libtritoncache_redis.so`
 
 With these TRITONCACHE APIs, `tritonserver` exposes a new `--cache-config` 
 CLI flag that gives the user flexible customization of which cache implementation
@@ -124,18 +125,44 @@ When `--cache-config local,size=SIZE` is specified with a non-zero `SIZE`,
 Triton allocates the requested size in CPU memory and **shares the
 cache across all inference requests and across all models**. 
 
+#### Redis Cache
+
+The `redis` cache implementation exposes the ability for Triton to communicate
+with a Redis server for caching. The `redis_cache` implementation is essentially
+a Redis client that acts as an intermediary between Triton and Redis. 
+
+To list a few benefits of the `redis` cache compared to the `local` cache in
+the context of Triton:
+- The Redis server can be hosted remotely as long as it is accesible by Triton,
+  so it is not tied directly to the Triton process lifetime. 
+  - This means Triton can be restarted and still have access to previously cached entries.
+  - This also means that Triton doesn't have to compete with the cache for memory/resource usage.
+- Multiple Triton instances can share a cache by configuring each Triton instance
+  to communicate with the same Redis server.
+- The Redis server can be updated/restarted independently of Triton, and
+  Triton will fallback to operating as it would with no cache access during 
+  any Redis server downtime, and log appropriate errors.
+
+In general, the Redis server can be configured/deployed as needed for your use 
+case, and Triton's `redis` cache will simply act as a client of your Redis 
+deployment. The [Redis docs](https://redis.io/docs/) should be consulted for 
+questions and details about configuring the Redis server.
+
+For Triton-specific `redis` cache implementation details/configuration, see the
+[redis cache implementation](https://github.com/triton-inference-server/redis_cache).
+
 #### Custom Cache
 
 With the new the TRITONCACHE API interface, it is now possible for
 users to implement their own cache to suit any use-case specific needs.
 To see the required interface that must be implemented by a cache
 developer, see the 
 [TRITONCACHE API header](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h).
-The `local` cache implementation may be used as a reference implementation.
+The `local` or `redis` cache implementations may be used as reference.
 
 Upon successfully developing and building a custom cache, the resulting shared
 library (ex: `libtritoncache_<name>.so`) must be placed in the cache directory
-similar to where the `local` cache implementation lives. By default,
+similar to where the `local` and `redis` cache implementations live. By default,
 this directory is `/opt/tritonserver/caches`, but a custom directory may be
 specified with `--cache-dir` as needed. 
 
@@ -184,9 +211,10 @@ a response.
 For cases where cache hits are common and computation is expensive, 
 the cache can significantly improve overall performance.
 
-For cases where all or most requests are unique (cache misses), the
-cache may negatively impact the overall performance due to the overhead
-of managing the cache.
+For cases where most requests are unique (cache misses) or the compute is
+fast/cheap (the model is not compute-bound), the cache can negatively impact
+the overall performance due to the overhead of managing and communicating with
+the cache.
 
 ## Known Limitations
 

diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
@@ -29,12 +29,54 @@ RET=0
 
 TEST_LOG="./response_cache_test.log"
 UNIT_TEST=./response_cache_test
+export CUDA_VISIBLE_DEVICES=0
+
+# Only localhost supported in this test for now, but in future could make
+# use of a persistent remote redis server, or similarly use --replicaof arg.
+export TRITON_REDIS_HOST="localhost"
+export TRITON_REDIS_PORT="6379"
 
 rm -fr *.log
 
-# UNIT TEST
+function install_redis() {
+  ## Install redis if not already installed
+  if ! command -v redis-server >/dev/null 2>&1; then
+    apt update -y && apt install -y redis
+  fi
+}
+
+function start_redis() {
+  # Run redis server in background
+  redis-server --daemonize yes --port "${TRITON_REDIS_PORT}"
+
+  # Check redis server is running
+  REDIS_PING_RESPONSE=$(redis-cli -h ${TRITON_REDIS_HOST} -p ${TRITON_REDIS_PORT} ping)
+  if [ "${REDIS_PING_RESPONSE}" == "PONG" ]; then
+    echo "Redis successfully started in background"
+  else
+    echo -e "\n***\n*** Failed: Redis server did not start successfully\n***"
+    RET=1
+  fi
+}
+
+function stop_redis() {
+  echo "Stopping Redis server..."
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown > /dev/null 2>&1 || true
+  echo "Redis server shutdown"
+}
+
+# UNIT TESTS
 set +e
-export CUDA_VISIBLE_DEVICES=0
+
+## Unit tests currently run for both Local and Redis cache implementaitons
+## by default. However, we could break out the unit tests for each
+## into separate runs gtest filters if needed in the future:
+## - `${UNIT_TEST} --gtest_filter=*Local*`
+## - `${UNIT_TEST} --gtest_filter=*Redis*`
+install_redis
+# Stop any existing redis server first for good measure
+stop_redis
+start_redis
 LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $TEST_LOG
@@ -48,10 +90,33 @@ function check_server_success_and_kill {
     if [ "${SERVER_PID}" == "0" ]; then
         echo -e "\n***\n*** Failed to start ${SERVER}\n***"
         cat ${SERVER_LOG}
-        exit 1
+        RET=1
+    else
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    fi
+}
+
+function check_server_expected_failure {
+    EXPECTED_MESSAGE="${1}"
+    if [ "${SERVER_PID}" != "0" ]; then
+        echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
+        cat ${SERVER_LOG}
+        RET=1
+
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    else
+        # Check that server fails with the correct error message
+        set +e
+        grep -i "${EXPECTED_MESSAGE}" ${SERVER_LOG} 
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed: Expected [${EXPECTED_MESSAGE}] error message in output\n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+        set -e
     fi
-    kill $SERVER_PID
-    wait $SERVER_PID
 }
 
 MODEL_DIR="${PWD}/models"
@@ -102,46 +167,35 @@ check_server_success_and_kill
 # Test that specifying multiple cache types is not supported and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 --cache-config=redis,key=value ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
-
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "multiple cache configurations" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected multiple cache configuration error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+check_server_expected_failure "multiple cache configurations"
 
 # Test that specifying both config styles is incompatible and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --cache-config=local,size=67890 ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
+check_server_expected_failure "incompatible flags"
 
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "incompatible flags" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected incompatible cache config flags error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+# Test simple redis cache config succeeds
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test triton fails to initialize if it can't connect to redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=localhost --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Connection refused"
+
+# Test triton fails to initialize if it can't resolve host for redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Temporary failure in name resolution"
+
+# Test triton fails to initialize if minimum required args (host & port) not all provided
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Must at a minimum specify"
+
+# Clean up redis server before exiting test
+stop_redis
 
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"

diff --git a/qa/common/util.sh b/qa/common/util.sh
@@ -66,7 +66,7 @@ function wait_for_server_ready() {
 
     local wait_secs=$wait_time_secs
     until test $wait_secs -eq 0 ; do
-        if ! kill -0 $spid; then
+        if ! kill -0 $spid > /dev/null 2>&1; then
             echo "=== Server not running."
             WAIT_RET=1
             return
@@ -147,13 +147,13 @@ function wait_for_model_stable() {
 }
 
 function gdb_helper () {
-  if ! command -v gdb; then
+  if ! command -v gdb > /dev/null 2>&1; then
     echo "=== WARNING: gdb not installed"
     return
   fi
 
   ### Server Hang ###
-  if kill -0 ${SERVER_PID}; then
+  if kill -0 ${SERVER_PID} > /dev/null 2>&1; then
     # If server process is still alive, try to get backtrace and core dump from it
     GDB_LOG="gdb_bt.${SERVER_PID}.log"
     echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
@@ -166,7 +166,7 @@ function gdb_helper () {
 
   ### Server Segfaulted ###
   # If there are any core dumps locally from a segfault, load them and get a backtrace
-  for corefile in $(ls core.*); do
+  for corefile in $(ls core.* > /dev/null 2>&1); do
     GDB_LOG="${corefile}.log"
     echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
     gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true; 
@@ -204,7 +204,7 @@ function run_server () {
         gdb_helper || true
 
         # Cleanup
-        kill $SERVER_PID || true
+        kill $SERVER_PID > /dev/null 2>&1 || true
         SERVER_PID=0
     fi
 }