From a18b84448d4fd73db1edb0b59cd96e0d0f2bfefa Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 7 Jun 2023 14:46:53 -0700
Subject: [PATCH 1/6] Add redis_cache to build.py

---
 build.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/build.py b/build.py
index 9c44f9ac9a..bff27b9939 100755
--- a/build.py
+++ b/build.py
@@ -1768,8 +1768,7 @@ def enable_all():
             'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = ['gcs', 's3', 'azure_storage']
         all_endpoints = ['http', 'grpc', 'sagemaker', 'vertex-ai']
 
@@ -1787,8 +1786,7 @@ def enable_all():
             'openvino', 'tensorrt'
         ]
         all_repoagents = ['checksum']
-        # DLIS-4491: Add redis cache to build
-        all_caches = ['local']
+        all_caches = ['local', 'redis']
         all_filesystems = []
         all_endpoints = ['http', 'grpc']
 

From c5169c9aab03ad25f7fdce4b07eaca1d1abe0860 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 7 Jun 2023 15:03:41 -0700
Subject: [PATCH 2/6] Add redis cache docs section

---
 docs/user_guide/response_cache.md | 38 +++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
index 1d719072d1..41bcd7d34c 100644
--- a/docs/user_guide/response_cache.md
+++ b/docs/user_guide/response_cache.md
@@ -105,6 +105,7 @@ For tags `>=23.03`,
 [tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
 come with the following cache implementations out of the box:
 - [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`
+- [redis](https://github.com/triton-inference-server/redis_cache): `/opt/tritonserver/caches/redis/libtritoncache_redis.so`
 
 With these TRITONCACHE APIs, `tritonserver` exposes a new `--cache-config` 
 CLI flag that gives the user flexible customization of which cache implementation
@@ -124,6 +125,32 @@ When `--cache-config local,size=SIZE` is specified with a non-zero `SIZE`,
 Triton allocates the requested size in CPU memory and **shares the
 cache across all inference requests and across all models**. 
 
+#### Redis Cache
+
+The `redis` cache implementation exposes the ability for Triton to communicate
+with a Redis server for caching. The `redis_cache` implementation is essentially
+a Redis client that acts as an intermediary between Triton and Redis. 
+
+To list a few benefits of the `redis` cache compared to the `local` cache in
+the context of Triton:
+- The Redis server can be hosted remotely as long as it is accesible by Triton,
+  so it is not tied directly to the Triton process lifetime. 
+  - This means Triton can be restarted and still have access to previously cached entries.
+  - This also means that Triton doesn't have to compete with the cache for memory/resource usage.
+- Multiple Triton instances can share a cache by configuring each Triton instance
+  to communicate with the same Redis server.
+- The Redis server can be updated/restarted independently of Triton, and
+  Triton will fallback to operating as it would with no cache access during 
+  any Redis server downtime, and log appropriate errors.
+
+In general, the Redis server can be configured/deployed as needed for your use 
+case, and Triton's `redis` cache will simply act as a client of your Redis 
+deployment. The [Redis docs](https://redis.io/docs/) should be consulted for 
+questions and details about configuring the Redis server.
+
+For Triton-specific `redis` cache implementation details/configuration, see the
+[redis cache implementation](https://github.com/triton-inference-server/redis_cache).
+
 #### Custom Cache
 
 With the new the TRITONCACHE API interface, it is now possible for
@@ -131,11 +158,11 @@ users to implement their own cache to suit any use-case specific needs.
 To see the required interface that must be implemented by a cache
 developer, see the 
 [TRITONCACHE API header](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritoncache.h).
-The `local` cache implementation may be used as a reference implementation.
+The `local` or `redis` cache implementations may be used as reference.
 
 Upon successfully developing and building a custom cache, the resulting shared
 library (ex: `libtritoncache_<name>.so`) must be placed in the cache directory
-similar to where the `local` cache implementation lives. By default,
+similar to where the `local` and `redis` cache implementations live. By default,
 this directory is `/opt/tritonserver/caches`, but a custom directory may be
 specified with `--cache-dir` as needed. 
 
@@ -184,9 +211,10 @@ a response.
 For cases where cache hits are common and computation is expensive, 
 the cache can significantly improve overall performance.
 
-For cases where all or most requests are unique (cache misses), the
-cache may negatively impact the overall performance due to the overhead
-of managing the cache.
+For cases where most requests are unique (cache misses) or the compute is
+fast/cheap (the model is not compute-bound), the cache can negatively impact
+the overall performance due to the overhead of managing and communicating with
+the cache.
 
 ## Known Limitations
 

From 331081874c5b359a1d2c2ba196193edfe43ecfa2 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 7 Jun 2023 18:20:20 -0700
Subject: [PATCH 3/6] Add redis cache testing, and cleanup log spam in common
 utils

---
 qa/L0_response_cache/test.sh | 134 ++++++++++++++++++++++++-----------
 qa/common/util.sh            |  10 +--
 2 files changed, 99 insertions(+), 45 deletions(-)

diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
index 56dad05794..e2cc7e9c60 100755
--- a/qa/L0_response_cache/test.sh
+++ b/qa/L0_response_cache/test.sh
@@ -29,12 +29,54 @@ RET=0
 
 TEST_LOG="./response_cache_test.log"
 UNIT_TEST=./response_cache_test
+export CUDA_VISIBLE_DEVICES=0
+
+# Only localhost supported in this test for now, but in future could make
+# use of a persistent remote redis server, or similarly use --replicaof arg.
+export TRITON_REDIS_HOST="localhost"
+export TRITON_REDIS_PORT="6379"
 
 rm -fr *.log
 
-# UNIT TEST
+function install_redis() {
+  ## Install redis if not already installed
+  if ! command -v redis-server >/dev/null 2>&1; then
+    apt update -y && apt install -y redis
+  fi
+}
+
+function start_redis() {
+  # Run redis server in background
+  redis-server --daemonize yes --port "${TRITON_REDIS_PORT}"
+
+  # Check redis server is running
+  REDIS_PING_RESPONSE=$(redis-cli -h ${TRITON_REDIS_HOST} -p ${TRITON_REDIS_PORT} ping)
+  if [ "${REDIS_PING_RESPONSE}" == "PONG" ]; then
+    echo "Redis successfully started in background"
+  else
+    echo -e "\n***\n*** Failed: Redis server did not start successfully\n***"
+    RET=1
+  fi
+}
+
+function stop_redis() {
+  echo "Stopping Redis server..."
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown > /dev/null 2>&1 || true
+  echo "Redis server shutdown"
+}
+
+# UNIT TESTS
 set +e
-export CUDA_VISIBLE_DEVICES=0
+
+## Unit tests currently run for both Local and Redis cache implementaitons
+## by default. However, we could break out the unit tests for each
+## into separate runs gtest filters if needed in the future:
+## - `${UNIT_TEST} --gtest_filter=*Local*`
+## - `${UNIT_TEST} --gtest_filter=*Redis*`
+install_redis
+# Stop any existing redis server first for good measure
+stop_redis
+start_redis
 LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $UNIT_TEST >>$TEST_LOG 2>&1
 if [ $? -ne 0 ]; then
     cat $TEST_LOG
@@ -48,10 +90,33 @@ function check_server_success_and_kill {
     if [ "${SERVER_PID}" == "0" ]; then
         echo -e "\n***\n*** Failed to start ${SERVER}\n***"
         cat ${SERVER_LOG}
-        exit 1
+        RET=1
+    else
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    fi
+}
+
+function check_server_expected_failure {
+    EXPECTED_MESSAGE="${1}"
+    if [ "${SERVER_PID}" != "0" ]; then
+        echo -e "\n***\n*** Failed: ${SERVER} started successfully when it was expected to fail\n***"
+        cat ${SERVER_LOG}
+        RET=1
+
+        kill ${SERVER_PID}
+        wait ${SERVER_PID}
+    else
+        # Check that server fails with the correct error message
+        set +e
+        grep -i "${EXPECTED_MESSAGE}" ${SERVER_LOG} 
+        if [ $? -ne 0 ]; then
+            echo -e "\n***\n*** Failed: Expected [${EXPECTED_MESSAGE}] error message in output\n***"
+            cat $SERVER_LOG
+            RET=1
+        fi
+        set -e
     fi
-    kill $SERVER_PID
-    wait $SERVER_PID
 }
 
 MODEL_DIR="${PWD}/models"
@@ -102,46 +167,35 @@ check_server_success_and_kill
 # Test that specifying multiple cache types is not supported and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=local,size=8192 --cache-config=redis,key=value ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
-
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "multiple cache configurations" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected multiple cache configuration error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+check_server_expected_failure "multiple cache configurations"
 
 # Test that specifying both config styles is incompatible and should fail
 SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --cache-config=local,size=67890 ${EXTRA_ARGS}"
 run_server
-if [ "$SERVER_PID" != "0" ]; then
-    echo -e "\n***\n*** Failed: $SERVER started successfully when it was expected to fail\n***"
-    cat $SERVER_LOG
-    RET=1
+check_server_expected_failure "incompatible flags"
 
-    kill $SERVER_PID
-    wait $SERVER_PID
-else
-    # Check that server fails with the correct error message
-    set +e
-    grep -i "incompatible flags" ${SERVER_LOG}
-    if [ $? -ne 0 ]; then
-        echo -e "\n***\n*** Failed: Expected incompatible cache config flags error message in output\n***"
-        cat $SERVER_LOG
-        RET=1
-    fi
-    set -e
-fi
+# Test simple redis cache config succeeds
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test triton fails to initialize if it can't connect to redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=localhost --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Connection refused"
+
+# Test triton fails to initialize if it can't resolve host for redis cache
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Failed to connect to Redis: Temporary failure in name resolution"
+
+# Test triton fails to initialize if minimum required args (host & port) not all provided
+SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "Must at a minimum specify"
+
+# Clean up redis server before exiting test
+stop_redis
 
 if [ $RET -eq 0 ]; then
   echo -e "\n***\n*** Test Passed\n***"
diff --git a/qa/common/util.sh b/qa/common/util.sh
index 30e12d8ec5..c8535249fd 100755
--- a/qa/common/util.sh
+++ b/qa/common/util.sh
@@ -66,7 +66,7 @@ function wait_for_server_ready() {
 
     local wait_secs=$wait_time_secs
     until test $wait_secs -eq 0 ; do
-        if ! kill -0 $spid; then
+        if ! kill -0 $spid > /dev/null 2>&1; then
             echo "=== Server not running."
             WAIT_RET=1
             return
@@ -147,13 +147,13 @@ function wait_for_model_stable() {
 }
 
 function gdb_helper () {
-  if ! command -v gdb; then
+  if ! command -v gdb > /dev/null 2>&1; then
     echo "=== WARNING: gdb not installed"
     return
   fi
 
   ### Server Hang ###
-  if kill -0 ${SERVER_PID}; then
+  if kill -0 ${SERVER_PID} > /dev/null 2>&1; then
     # If server process is still alive, try to get backtrace and core dump from it
     GDB_LOG="gdb_bt.${SERVER_PID}.log"
     echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
@@ -166,7 +166,7 @@ function gdb_helper () {
 
   ### Server Segfaulted ###
   # If there are any core dumps locally from a segfault, load them and get a backtrace
-  for corefile in $(ls core.*); do
+  for corefile in $(ls core.* > /dev/null 2>&1); do
     GDB_LOG="${corefile}.log"
     echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
     gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true; 
@@ -204,7 +204,7 @@ function run_server () {
         gdb_helper || true
 
         # Cleanup
-        kill $SERVER_PID || true
+        kill $SERVER_PID > /dev/null 2>&1 || true
         SERVER_PID=0
     fi
 }

From c1b43a2fb791cc236db54e806f71ea7e493410be Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Wed, 7 Jun 2023 20:14:42 -0700
Subject: [PATCH 4/6] Review feedback: remove 'the new' wording around cache
 API

---
 docs/user_guide/response_cache.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
index 41bcd7d34c..9f9c3d9b05 100644
--- a/docs/user_guide/response_cache.md
+++ b/docs/user_guide/response_cache.md
@@ -153,7 +153,7 @@ For Triton-specific `redis` cache implementation details/configuration, see the
 
 #### Custom Cache
 
-With the new the TRITONCACHE API interface, it is now possible for
+With the TRITONCACHE API interface, it is now possible for
 users to implement their own cache to suit any use-case specific needs.
 To see the required interface that must be implemented by a cache
 developer, see the 

From b6c877947bffe9c01bc2181d44bc5cbc45b65795 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Thu, 8 Jun 2023 11:58:16 -0700
Subject: [PATCH 5/6] Add authenticated redis tests

---
 qa/L0_response_cache/test.sh | 52 +++++++++++++++++++++++++++++++++---
 1 file changed, 49 insertions(+), 3 deletions(-)

diff --git a/qa/L0_response_cache/test.sh b/qa/L0_response_cache/test.sh
index e2cc7e9c60..bd067fb6b4 100755
--- a/qa/L0_response_cache/test.sh
+++ b/qa/L0_response_cache/test.sh
@@ -61,10 +61,25 @@ function start_redis() {
 
 function stop_redis() {
   echo "Stopping Redis server..."
-  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown > /dev/null 2>&1 || true
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" shutdown || true
   echo "Redis server shutdown"
 }
 
+function set_redis_auth() {
+  # NOTE: Per-user auth [Access Control List (ACL)] is only supported in 
+  #       Redis >= 6.0 and is more comprehensive in what can be configured. 
+  #       For simplicity and wider range of Redis version support, use
+  #       server-wide password  via "requirepass" for now.
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass "${REDIS_PW}"
+  export REDISCLI_AUTH="${REDIS_PW}"
+}
+
+function unset_redis_auth() {
+  # Authenticate implicitly via REDISCLI_AUTH env var, then unset password/var
+  redis-cli -h "${TRITON_REDIS_HOST}" -p "${TRITON_REDIS_PORT}" config set requirepass ""
+  unset REDISCLI_AUTH
+}
+
 # UNIT TESTS
 set +e
 
@@ -174,8 +189,11 @@ SERVER_ARGS="--model-repository=${MODEL_DIR} --response-cache-byte-size=12345 --
 run_server
 check_server_expected_failure "incompatible flags"
 
+## Redis Cache CLI tests
+REDIS_ENDPOINT="--cache-config redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT}"
+
 # Test simple redis cache config succeeds
-SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=${TRITON_REDIS_HOST} --cache-config redis,port=${TRITON_REDIS_PORT} ${EXTRA_ARGS}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
 run_server
 check_server_success_and_kill
 
@@ -187,14 +205,42 @@ check_server_expected_failure "Failed to connect to Redis: Connection refused"
 # Test triton fails to initialize if it can't resolve host for redis cache
 SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,host=nonexistent --cache-config=redis,port=nonexistent ${EXTRA_ARGS}"
 run_server
-check_server_expected_failure "Failed to connect to Redis: Temporary failure in name resolution"
+# Either of these errors can be returned for bad hostname, so check for either.
+MSG1="Temporary failure in name resolution"
+MSG2="Name or service not known"
+check_server_expected_failure "${MSG1}\|${MSG2}"
 
 # Test triton fails to initialize if minimum required args (host & port) not all provided
 SERVER_ARGS="--model-repository=${MODEL_DIR} --cache-config=redis,port=${TRITON_REDIS_HOST} ${EXTRA_ARGS}"
 run_server
 check_server_expected_failure "Must at a minimum specify"
 
+## Redis Authentication tests
+
+# Automatically provide auth via REDISCLI_AUTH env var when set: https://redis.io/docs/ui/cli/
+REDIS_PW="redis123!"
+set_redis_auth
+
+# Test simple redis authentication succeeds with correct credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=${REDIS_PW}"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_success_and_kill
+
+# Test simple redis authentication fails with wrong credentials
+REDIS_CACHE_AUTH="--cache-config redis,password=wrong"
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${REDIS_CACHE_AUTH} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "WRONGPASS"
+
+
+# Test simple redis authentication fails with no credentials
+SERVER_ARGS="--model-repository=${MODEL_DIR} ${REDIS_ENDPOINT} ${EXTRA_ARGS}"
+run_server
+check_server_expected_failure "NOAUTH Authentication required"
+
 # Clean up redis server before exiting test
+unset_redis_auth
 stop_redis
 
 if [ $RET -eq 0 ]; then

From e6ee5a52067bfe158bb2a91c10f648c168f94469 Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Thu, 8 Jun 2023 12:32:26 -0700
Subject: [PATCH 6/6] Remove 23.03 note for list of caches to avoid confusion
 since Redis was added later

---
 docs/user_guide/response_cache.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/user_guide/response_cache.md b/docs/user_guide/response_cache.md
index 9f9c3d9b05..b526a3c84e 100644
--- a/docs/user_guide/response_cache.md
+++ b/docs/user_guide/response_cache.md
@@ -101,7 +101,8 @@ that are used to communicate with a cache implementation of the user's choice.
 
 A cache implementation is a shared library that implements the required
 TRITONCACHE APIs and is dynamically loaded on server startup, if enabled. 
-For tags `>=23.03`, 
+
+Triton's most recent
 [tritonserver release containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver)
 come with the following cache implementations out of the box:
 - [local](https://github.com/triton-inference-server/local_cache): `/opt/tritonserver/caches/local/libtritoncache_local.so`