diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 496185816a..fd0e93500f 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.10-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.11-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_COMMON_REPO_TAG=main diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min index 8dba7bf8bc..8d2d43e3dd 100644 --- a/Dockerfile.win10.min +++ b/Dockerfile.win10.min @@ -83,7 +83,13 @@ ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe # Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended. ARG VS_INSTALL_PATH_WP="C:\BuildTools" -RUN vs_buildtools.exe --quiet --wait --norestart --nocache install --installPath %VS_INSTALL_PATH_WP% --channelUri "C:\tmp\VisualStudio.chman" --installChannelUri "C:\tmp\VisualStudio.chman" --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --locale "En-us" +RUN vs_buildtools.exe --quiet --wait --norestart --nocache install \ + --installPath %VS_INSTALL_PATH_WP% \ + --channelUri "C:\tmp\VisualStudio.chman" \ + --installChannelUri "C:\tmp\VisualStudio.chman" \ + --add Microsoft.VisualStudio.Workload.VCTools \ + --includeRecommended \ + --locale "En-us" LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION} @@ -97,7 +103,17 @@ RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/mi WORKDIR /vcpkg RUN bootstrap-vcpkg.bat RUN vcpkg.exe update -RUN vcpkg.exe install openssl:x64-windows openssl-windows:x64-windows rapidjson:x64-windows re2:x64-windows boost-filesystem:x64-windows boost-interprocess:x64-windows boost-stacktrace:x64-windows zlib:x64-windows pthread:x64-windows b64:x64-windows +RUN vcpkg.exe install \ + b64:x64-windows \ + boost-filesystem:x64-windows \ + boost-interprocess:x64-windows \ + boost-stacktrace:x64-windows \ + openssl-windows:x64-windows \ + openssl:x64-windows \ + pthread:x64-windows \ + rapidjson:x64-windows \ + re2:x64-windows \ + zlib:x64-windows RUN vcpkg.exe integrate install LABEL VCPGK_VERSION=${VCPGK_VERSION} @@ -108,8 +124,8 @@ WORKDIR / # Installing CUDA # ARG CUDA_MAJOR=12 -ARG CUDA_MINOR=2 -ARG CUDA_PATCH=1 +ARG CUDA_MINOR=3 +ARG CUDA_PATCH=0 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH} ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \ cudart_${CUDA_MAJOR}.${CUDA_MINOR} \ @@ -157,7 +173,7 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}" # # Installing cuDNN # -ARG CUDNN_VERSION=8.9.5.27 +ARG CUDNN_VERSION=8.9.6.50 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip ARG CUDNN_SOURCE=${CUDNN_ZIP} diff --git a/README.md b/README.md index 72c671a253..b9cf911424 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ **LATEST RELEASE: You are currently on the main branch which tracks under-development progress towards the next release. The current release is -version [2.38.0](https://github.com/triton-inference-server/server/tree/r23.09) -and corresponds to the 23.09 container release on +version [2.40.0](https://github.com/triton-inference-server/server/tree/r23.11) +and corresponds to the 23.11 container release on [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).** ---- @@ -93,16 +93,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r23.10 https://github.com/triton-inference-server/server.git +git clone -b r23.11 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.10-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.11-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.10-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.11-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following diff --git a/build.py b/build.py index e364827c6b..5608e59685 100755 --- a/build.py +++ b/build.py @@ -72,7 +72,7 @@ TRITON_VERSION_MAP = { "2.41.0dev": ( "23.12dev", # triton container - "23.10", # upstream container + "23.11", # upstream container "1.16.3", # ORT "2023.0.0", # ORT OpenVINO "2023.0.0", # Standalone OpenVINO @@ -1389,10 +1389,10 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine): COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11 RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib -COPY --from=min_container /usr/local/cuda-12.2/targets/{cuda_arch}-linux/lib/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. -COPY --from=min_container /usr/local/cuda-12.2/targets/{cuda_arch}-linux/lib/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. -COPY --from=min_container /usr/local/cuda-12.2/targets/{cuda_arch}-linux/lib/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. -COPY --from=min_container /usr/local/cuda-12.2/targets/{cuda_arch}-linux/lib/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libnvToolsExt.so.1 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/ COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1 diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index c009f423d0..9e511742bc 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.10-py3 + imageName: nvcr.io/nvidia/tritonserver:23.11-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index 1d6706a7e8..959d69794a 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.39.0" +appVersion: "2.40.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index 44f4ebb81c..df0071136e 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.10-py3 + imageName: nvcr.io/nvidia/tritonserver:23.11-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -46,13 +46,13 @@ image: # Model Control Mode (Optional, default: none) # # To set model control mode, uncomment and configure below - # See https://github.com/triton-inference-server/server/blob/r23.10/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r23.11/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r23.10/README.md + # see https://github.com/triton-inference-server/server/blob/r23.11/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index b658cca654..ed01d80d52 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.10-py3 + imageName: nvcr.io/nvidia/tritonserver:23.11-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index 0f35495a67..f15abbbbc5 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:23.10-py3-sdk + - image: nvcr.io/nvidia/tritonserver:23.11-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 4c626c310f..2f12104749 100755 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -27,9 +27,9 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver -export MAJOR_VERSION=2.39 -export MINOR_VERSION=2.39.0 -export NGC_VERSION=23.10-py3 +export MAJOR_VERSION=2.40 +export MINOR_VERSION=2.40.0 +export NGC_VERSION=23.11-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index 59aaddb618..a7c8da41b3 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. apiVersion: v1 -appVersion: "2.39" +appVersion: "2.40" description: Triton Inference Server name: triton-inference-server -version: 2.39.0 +version: 2.40.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index 7ab73a2ce4..f168c13c86 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -32,13 +32,13 @@ tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 modelRepositoryPath: gs://triton_sample_models/23_09 -publishedVersion: '2.39.0' +publishedVersion: '2.40.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 23.10-py3 + tag: 23.11-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index 117a87d477..df57ba1f30 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.39.0' + publishedVersion: '2.40.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index 148c78d894..25fe6515cd 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.39.0' + publishedVersion: '2.40.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md index a85e88cc48..6e1d28e893 100644 --- a/deploy/gke-marketplace-app/trt-engine/README.md +++ b/deploy/gke-marketplace-app/trt-engine/README.md @@ -33,7 +33,7 @@ ``` docker run --gpus all -it --network host \ --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -v ~:/scripts nvcr.io/nvidia/tensorrt:23.10-py3 + -v ~:/scripts nvcr.io/nvidia/tensorrt:23.11-py3 pip install onnx six torch tf2onnx tensorflow diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml index 8ad9511390..3e3066bbf6 100644 --- a/deploy/k8s-onprem/values.yaml +++ b/deploy/k8s-onprem/values.yaml @@ -29,7 +29,7 @@ tags: loadBalancing: true image: - imageName: nvcr.io/nvidia/tritonserver:23.10-py3 + imageName: nvcr.io/nvidia/tritonserver:23.11-py3 pullPolicy: IfNotPresent modelRepositoryServer: < Replace with the IP Address of your file server > modelRepositoryPath: /srv/models diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index a314ce1d4c..40f8f00c76 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common: --repo-tag=core:` will default to the branch name. For example, if you are building on the -r23.10 branch, `` will default to r23.10. If you are +r23.11 branch, `` will default to r23.11. If you are building on any other branch (including the *main* branch) then `` will default to "main". Therefore, you typically do not need to provide `` at all (nor the preceding @@ -334,8 +334,8 @@ python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild If you are building on *main* branch then '' will default to "main". If you are building on a release branch then '' will default to the branch name. For example, if you -are building on the r23.10 branch, '' will default to -r23.10. Therefore, you typically do not need to provide '' will default to +r23.11. Therefore, you typically do not need to provide '' at all (nor the preceding colon). You can use a different '' for a component to instead use the corresponding branch/tag in the build. For example, if you have a branch called diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md index 89f9d65aba..5c66e8933b 100644 --- a/docs/customization_guide/compose.md +++ b/docs/customization_guide/compose.md @@ -44,8 +44,8 @@ from source to get more exact customization. The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server). Simply clone the repository and run `compose.py` to create a custom container. Note: Created container version will depend on the branch that was cloned. -For example branch [r23.10](https://github.com/triton-inference-server/server/tree/r23.10) -should be used to create a image based on the NGC 23.10 Triton release. +For example branch [r23.11](https://github.com/triton-inference-server/server/tree/r23.11) +should be used to create a image based on the NGC 23.11 Triton release. `compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. @@ -76,19 +76,19 @@ For example, running ``` python3 compose.py --backend tensorflow1 --repoagent checksum ``` -on branch [r23.10](https://github.com/triton-inference-server/server/tree/r23.10) pulls: -- `min` container `nvcr.io/nvidia/tritonserver:23.10-py3-min` -- `full` container `nvcr.io/nvidia/tritonserver:23.10-py3` +on branch [r23.11](https://github.com/triton-inference-server/server/tree/r23.11) pulls: +- `min` container `nvcr.io/nvidia/tritonserver:23.11-py3-min` +- `full` container `nvcr.io/nvidia/tritonserver:23.11-py3` Alternatively, users can specify the version of Triton container to pull from any branch by either: 1. Adding flag `--container-version ` to branch ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.10 +python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.11 ``` 2. Specifying `--image min, --image full,`. The user is responsible for specifying compatible `min` and `full` containers. ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.10-py3-min --image full,nvcr.io/nvidia/tritonserver:23.10-py3 +python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.11-py3-min --image full,nvcr.io/nvidia/tritonserver:23.11-py3 ``` Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified. diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md index 42a96a9d36..39b517a50d 100644 --- a/docs/customization_guide/test.md +++ b/docs/customization_guide/test.md @@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops ``` This will create multiple model repositories in /tmp//qa_* -(for example /tmp/23.10/qa_model_repository). The TensorRT models +(for example /tmp/23.11/qa_model_repository). The TensorRT models will be created for the GPU on the system that CUDA considers device 0 (zero). If you have multiple GPUs on your system see the documentation in the scripts for how to target a specific GPU. diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md index 31a4093431..daecf2e209 100644 --- a/docs/user_guide/custom_operations.md +++ b/docs/user_guide/custom_operations.md @@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is to use the [NGC TensorRT container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) corresponding to the Triton container. For example, if you are using -the 23.10 version of Triton, use the 23.10 version of the TensorRT +the 23.11 version of Triton, use the 23.11 version of the TensorRT container. ## TensorFlow @@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow is to use the [NGC TensorFlow container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow) corresponding to the Triton container. For example, if you are using -the 23.10 version of Triton, use the 23.10 version of the TensorFlow +the 23.11 version of Triton, use the 23.11 version of the TensorFlow container. ## PyTorch @@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is to use the [NGC PyTorch container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) corresponding to the Triton container. For example, if you are using -the 23.10 version of Triton, use the 23.10 version of the PyTorch +the 23.11 version of Triton, use the 23.11 version of the PyTorch container. ## ONNX diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md index d2e2deaa34..e28789a2d3 100644 --- a/docs/user_guide/performance_tuning.md +++ b/docs/user_guide/performance_tuning.md @@ -235,7 +235,7 @@ with a `tritonserver` binary. ```bash # Start server container -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.10-py3 +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.11-py3 # Start serving your models tritonserver --model-repository=/mnt/models @@ -284,7 +284,7 @@ by setting the `-u` flag, such as `perf_analyzer -m densenet_onnx -u ```bash # Start the SDK container interactively -docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.10-py3-sdk +docker run -ti --rm --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.11-py3-sdk # Benchmark model being served from step 3 perf_analyzer -m densenet_onnx --concurrency-range 1:4 diff --git a/qa/L0_infer/test.sh b/qa/L0_infer/test.sh index fff1d7c5e1..971039a9a9 100755 --- a/qa/L0_infer/test.sh +++ b/qa/L0_infer/test.sh @@ -38,6 +38,8 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} fi +ldconfig || true + export CUDA_VISIBLE_DEVICES=0 TEST_RESULT_FILE='test_results.txt' diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh index 6aa2f06629..d91b433966 100755 --- a/qa/L0_sequence_batcher/test.sh +++ b/qa/L0_sequence_batcher/test.sh @@ -42,6 +42,8 @@ TEST_RESULT_FILE='test_results.txt' # Must run on a single device or else the TRITONSERVER_DELAY_SCHEDULER # can fail when the requests are distributed to multiple devices. +ldconfig || true + export CUDA_VISIBLE_DEVICES=0 CLIENT_LOG="./client.log" diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models index f33c11171d..6411a2ecb3 100755 --- a/qa/common/gen_jetson_trt_models +++ b/qa/common/gen_jetson_trt_models @@ -34,7 +34,7 @@ # Make all generated files accessible outside of container umask 0000 # Set the version of the models -TRITON_VERSION=${TRITON_VERSION:=23.10} +TRITON_VERSION=${TRITON_VERSION:=23.11} # Set the CUDA device to use CUDA_DEVICE=${RUNNER_ID:=0} # Set TensorRT image diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops index 3779ca2d16..dd9a0c23e4 100755 --- a/qa/common/gen_qa_custom_ops +++ b/qa/common/gen_qa_custom_ops @@ -37,7 +37,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=23.10} +TRITON_VERSION=${TRITON_VERSION:=23.11} NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3} @@ -130,7 +130,7 @@ nvidia-smi -L || true nvidia-smi || true set -e python3 $SRCDIR/gen_qa_custom_ops_models.py --libtorch --models_dir=$DESTDIR -cp /root/.cache/torch_extensions/py310_cu122/custom_modulo/custom_modulo.so $DESTDIR/libtorch_modulo/. +cp /root/.cache/torch_extensions/py310_cu123/custom_modulo/custom_modulo.so $DESTDIR/libtorch_modulo/. chmod -R 777 $DESTDIR EOF diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 009ce3d3e3..fa65f1afdc 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -48,7 +48,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=23.10} +TRITON_VERSION=${TRITON_VERSION:=23.11} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version ONNX_VERSION=1.13.0