From 4c834f01591807a624de24022557340632d29701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= <82108834+joaopcm1996@users.noreply.github.com> Date: Wed, 12 Oct 2022 16:53:44 +0100 Subject: [PATCH] Added sentence transformers example with TensorRT and Triton Ensemble (#3615) * Added sentence transformers example with TensorRT and Triton Ensemble * Notebook changes to pass CI build * Grammar fixes and installing torch for CI build * Installing torch to pass CI build Co-authored-by: atqy <95724753+atqy@users.noreply.github.com> --- .../README.md | 38 + .../ensemble_hf/bert-trt/config.pbtxt | 32 + .../examples/ensemble_hf/ensemble/1/README.md | 1 + .../ensemble_hf/ensemble/config.pbtxt | 70 ++ .../ensemble_hf/postprocess/1/model.py | 78 ++ .../ensemble_hf/postprocess/config.pbtxt | 26 + .../ensemble_hf/postprocess/requirements.txt | 1 + .../ensemble_hf/preprocess/1/model.py | 74 ++ .../ensemble_hf/preprocess/config.pbtxt | 28 + .../ensemble_hf/preprocess/requirements.txt | 1 + .../examples/triton_sentence_embeddings.ipynb | 1015 +++++++++++++++++ .../examples/workspace/generate_model_trt.sh | 14 + .../examples/workspace/onnx_exporter.py | 30 + .../images/triton-ensemble.png | Bin 0 -> 32690 bytes .../studio-image/README.md | 67 ++ .../studio-image/build_image.sh | 48 + .../studio-image/create_studio_image.sh | 54 + .../studio-image/image_tensorrt/Dockerfile | 5 + .../image_tensorrt/app-image-config.json | 16 + .../studio-image/studio-domain-config.json | 13 + .../studio-image/update_studio_domain.sh | 5 + .../studio-image/update_studio_image.sh | 49 + 22 files changed, 1665 insertions(+) create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/1/model.py create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/config.pbtxt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/requirements.txt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/1/model.py create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/config.pbtxt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/requirements.txt create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/triton_sentence_embeddings.ipynb create mode 100755 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/generate_model_trt.sh create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/onnx_exporter.py create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/images/triton-ensemble.png create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/README.md create mode 100755 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/build_image.sh create mode 100755 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/create_studio_image.sh create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/Dockerfile create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/app-image-config.json create mode 100644 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/studio-domain-config.json create mode 100755 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_domain.sh create mode 100755 inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_image.sh diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md new file mode 100644 index 0000000000..df1f8177c2 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md @@ -0,0 +1,38 @@ +# NVIDIA Triton Inference Server on SageMaker - Hugging Face Sentence Transformers + +## Introduction + +[HuggingFace Sentence Transformers](https://huggingface.co/sentence-transformers) is a Machine Learning (ML) framework and set of pre-trained models to +extract embeddings from sentence, text, and image. The models in this group can also be used with the default methods exposed through the [Transformers](https://www.google.com/search?q=transofrmers+githbu&rlz=1C5GCEM_enES937ES938&oq=transofrmers+githbu&aqs=chrome..69i57.3022j0j7&sourceid=chrome&ie=UTF-8) library. + +[NVIDIA Triton Inference Server](https://github.com/triton-inference-server/server/) is a high-performance ML model server, which enables the deployment of ML models in an easy, scalable, and cost-effective way. It also exposes many easy-to-use optimization features to make the most of the underlying hardware, in particular NVIDIA GPU's. + +In this example, we walk through how you can: +* Create an Amazon SageMaker Studio image based on the official [NVIDIA PyTorch](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) image, which includes the necessary dependencies to optimize your model +* Optimize a pre-trained HuggingFace Sentence Transformers model with NVIDIA TensorRT to enable high-performance inference +* Create a Triton Model Ensemble, which will allow you to run in sequence a pre-processing step (input tokenization), model inference and post-processing, where sentence embeddings are computed from the raw token embeddings + +This example is meant to serve as a basis for use-cases in which you need to run your own code before and/or after your model, allowing you to optimize the bulk of the computation (the model) using tools such as TensorRT. + +Triton Model Ensamble + +#### ! Important: The example provided can be tested also by using Amazon SageMaker Notebook Instances + +### Prerequisites + +1. Required NVIDIA NGC Account. Follow the instruction https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account + +## Step 1: Clone this repository + +## Step 2: Build Studio image + +In this example, we provide a [Dokerfile](./studio-image/image_tensorrt/Dockerfile) example to build a custom image for SageMaker Studio. + +To build the image, push it and make it available in your Amazon SageMaker Studio environment, edit [sagemaker-studio-config](./studio-image/studio-domain-config.json) by replacing `$DOMAIN_ID` with your Studio domain ID. + +We also provide automation scripts in order to [build and push](./studio-image/build_image.sh) your docker image to an ECR repository +and [create](./studio-image/create_studio_image.sh) or [update](./studio-image/update_studio_image.sh) an Amazon SageMaker Image. Please follow the instructions in the [README](./studio-image/README.md) for additional info on the usage of this script. + +## Step 3: Compile model, create an Amazon SageMaker Real-Time Endpoint with NVIDIA Triton Inference Server + +Clone this repository into your Amazon SageMaker Studio environment and execute the cells in the [notebook](./examples/triton_sentence_embeddings.ipynb) \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt new file mode 100644 index 0000000000..3bf605dfc4 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt @@ -0,0 +1,32 @@ +name: "bert-trt" +platform: "tensorrt_plan" +max_batch_size: 16 +input [ + { + name: "token_ids" + data_type: TYPE_INT32 + dims: [128] + }, + { + name: "attn_mask" + data_type: TYPE_INT32 + dims: [128] + } +] +output [ + { + name: "output" + data_type: TYPE_FP32 + dims: [128, 384] + }, + { + name: "854" + data_type: TYPE_FP32 + dims: [384] + } +] +instance_group [ + { + kind: KIND_GPU + } + ] \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md new file mode 100644 index 0000000000..a8e639ff9d --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md @@ -0,0 +1 @@ +Do not delete me! \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt new file mode 100644 index 0000000000..aa36dd1ded --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt @@ -0,0 +1,70 @@ +name: "ensemble" +platform: "ensemble" +max_batch_size: 16 +input [ + { + name: "INPUT0" + data_type: TYPE_STRING + dims: [ 1 ] + } +] +output [ + { + name: "finaloutput" + data_type: TYPE_FP32 + dims: [384] + } +] +ensemble_scheduling { + step [ + { + model_name: "preprocess" + model_version: -1 + input_map { + key: "INPUT0" + value: "INPUT0" + } + output_map { + key: "OUTPUT0" + value: "token_ids" + } + output_map { + key: "OUTPUT1" + value: "attn_mask" + } + }, + { + model_name: "bert-trt" + model_version: -1 + input_map { + key: "token_ids" + value: "token_ids" + } + input_map { + key: "attn_mask" + value: "attn_mask" + } + output_map { + key: "output" + value: "output" + } + }, + { + model_name: "postprocess" + model_version: -1 + input_map { + key: "TOKEN_EMBEDS_POST" + value: "output" + } + input_map { + key: "ATTENTION_POST" + value: "attn_mask" + } + output_map { + key: "SENT_EMBED" + value: "finaloutput" + } + + } + ] +} \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/1/model.py b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/1/model.py new file mode 100644 index 0000000000..5373e90dbb --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/1/model.py @@ -0,0 +1,78 @@ +import json +import logging +import numpy as np +import subprocess +import sys +import os + +import triton_python_backend_utils as pb_utils + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class TritonPythonModel: + """This model loops through different dtypes to make sure that + serialize_byte_tensor works correctly in the Python backend. + """ + + def __mean_pooling(self, token_embeddings, attention_mask): + logger.info("token_embeddings: {}".format(token_embeddings)) + logger.info("attention_mask: {}".format(attention_mask)) + + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) + + def initialize(self, args): + self.model_dir = args['model_repository'] + subprocess.check_call([sys.executable, "-m", "pip", "install", '-r', f'{self.model_dir}/requirements.txt']) + global torch + import torch + + self.device_id = args['model_instance_device_id'] + self.model_config = model_config = json.loads(args['model_config']) + self.device = torch.device(f'cuda:{self.device_id}') if torch.cuda.is_available() else torch.device('cpu') + + output0_config = pb_utils.get_output_config_by_name( + model_config, "SENT_EMBED") + + self.output0_dtype = pb_utils.triton_string_to_numpy( + output0_config["data_type"]) + + def execute(self, requests): + + responses = [] + + for request in requests: + tok_embeds = pb_utils.get_input_tensor_by_name(request, "TOKEN_EMBEDS_POST") + attn_mask = pb_utils.get_input_tensor_by_name(request, "ATTENTION_POST") + + tok_embeds = tok_embeds.as_numpy() + + logger.info("tok_embeds: {}".format(tok_embeds)) + logger.info("tok_embeds shape: {}".format(tok_embeds.shape)) + + tok_embeds = torch.tensor(tok_embeds,device=self.device) + + logger.info("tok_embeds_tensor: {}".format(tok_embeds)) + + attn_mask = attn_mask.as_numpy() + + logger.info("attn_mask: {}".format(attn_mask)) + logger.info("attn_mask shape: {}".format(attn_mask.shape)) + + attn_mask = torch.tensor(attn_mask,device=self.device) + + logger.info("attn_mask_tensor: {}".format(attn_mask)) + + sentence_embeddings = self.__mean_pooling(tok_embeds, attn_mask) + sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1) + + out_0 = np.array(sentence_embeddings.cpu(),dtype=self.output0_dtype) + logger.info("out_0: {}".format(out_0)) + + out_tensor_0 = pb_utils.Tensor("SENT_EMBED", out_0) + logger.info("out_tensor_0: {}".format(out_tensor_0)) + + responses.append(pb_utils.InferenceResponse([out_tensor_0])) + + return responses \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/config.pbtxt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/config.pbtxt new file mode 100644 index 0000000000..de573d924d --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/config.pbtxt @@ -0,0 +1,26 @@ +name: "postprocess" +backend: "python" +max_batch_size: 16 + +input [ + { + name: "TOKEN_EMBEDS_POST" + data_type: TYPE_FP32 + dims: [128, 384] + + }, + { + name: "ATTENTION_POST" + data_type: TYPE_INT32 + dims: [128] + } +] +output [ + { + name: "SENT_EMBED" + data_type: TYPE_FP32 + dims: [ 384 ] + } +] + +instance_group [{ kind: KIND_GPU }] \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/requirements.txt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/requirements.txt new file mode 100644 index 0000000000..08ed5eeb4b --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/postprocess/requirements.txt @@ -0,0 +1 @@ +torch \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/1/model.py b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/1/model.py new file mode 100644 index 0000000000..47b1f1befc --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/1/model.py @@ -0,0 +1,74 @@ +import json +import logging +import numpy as np +import subprocess +import sys + +import triton_python_backend_utils as pb_utils + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class TritonPythonModel: + """This model loops through different dtypes to make sure that + serialize_byte_tensor works correctly in the Python backend. + """ + + def initialize(self, args): + self.model_dir = args['model_repository'] + subprocess.check_call([sys.executable, "-m", "pip", "install", '-r', f'{self.model_dir}/requirements.txt']) + global transformers + import transformers + + self.tokenizer = transformers.AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') + self.model_config = model_config = json.loads(args['model_config']) + + output0_config = pb_utils.get_output_config_by_name( + model_config, "OUTPUT0") + output1_config = pb_utils.get_output_config_by_name( + model_config, "OUTPUT1") + + self.output0_dtype = pb_utils.triton_string_to_numpy( + output0_config['data_type']) + self.output1_dtype = pb_utils.triton_string_to_numpy( + output0_config['data_type']) + + def execute(self, requests): + + file = open("logs.txt", "w") + + responses = [] + for request in requests: + logger.info("Request: {}".format(request)) + + in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") + in_0 = in_0.as_numpy() + + logger.info("in_0: {}".format(in_0)) + + tok_batch = [] + + for i in range(in_0.shape[0]): + decoded_object = in_0[i,0].decode() + + logger.info("decoded_object: {}".format(decoded_object)) + + tok_batch.append(decoded_object) + + logger.info("tok_batch: {}".format(tok_batch)) + + tok_sent = self.tokenizer(tok_batch, + padding='max_length', + max_length=128, + ) + + + logger.info("Tokens: {}".format(tok_sent)) + + out_0 = np.array(tok_sent['input_ids'],dtype=self.output0_dtype) + out_1 = np.array(tok_sent['attention_mask'],dtype=self.output1_dtype) + out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0) + out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1) + + responses.append(pb_utils.InferenceResponse([out_tensor_0,out_tensor_1])) + return responses diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/config.pbtxt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/config.pbtxt new file mode 100644 index 0000000000..c3f70b03dd --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/config.pbtxt @@ -0,0 +1,28 @@ +name: "preprocess" +backend: "python" +max_batch_size: 16 + +input [ + { + name: "INPUT0" + data_type: TYPE_STRING + dims: [ 1 ] + + } +] +output [ + { + name: "OUTPUT0" + data_type: TYPE_INT32 + dims: [ 128 ] + }, + + { + name: "OUTPUT1" + data_type: TYPE_INT32 + dims: [ 128 ] + } + +] + +instance_group [{ kind: KIND_CPU }] \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/requirements.txt b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/requirements.txt new file mode 100644 index 0000000000..747b7aa97a --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/preprocess/requirements.txt @@ -0,0 +1 @@ +transformers \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/triton_sentence_embeddings.ipynb b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/triton_sentence_embeddings.ipynb new file mode 100644 index 0000000000..5fef5c4405 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/triton_sentence_embeddings.ipynb @@ -0,0 +1,1015 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce5723a5", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Prerequisites\n", + "\n", + "Install the necessary Python modules to use and interact with [NVIDIA Triton Inference Server](https://github.com/triton-inference-server/server/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5995424", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "! pip install torch==1.10.0 sagemaker transformers==4.9.1 tritonclient[all]" + ] + }, + { + "cell_type": "markdown", + "id": "4d9d12fa", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Part 1 - Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aef44c4", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import argparse\n", + "import boto3\n", + "import copy\n", + "import datetime\n", + "import json\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import pprint\n", + "import re\n", + "import sagemaker\n", + "import sys\n", + "import time\n", + "from time import gmtime, strftime\n", + "import tritonclient.http as http_client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe9a1636", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "session = boto3.Session()\n", + "role = sagemaker.get_execution_role()\n", + "\n", + "sm_client = session.client(\"sagemaker\")\n", + "sagemaker_session = sagemaker.Session(boto_session=session)\n", + "sm_runtime_client = boto3.client(\"sagemaker-runtime\")\n", + "\n", + "region = boto3.Session().region_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a4996fe6", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "account_id_map = {\n", + " \"us-east-1\": \"785573368785\",\n", + " \"us-east-2\": \"007439368137\",\n", + " \"us-west-1\": \"710691900526\",\n", + " \"us-west-2\": \"301217895009\",\n", + " \"eu-west-1\": \"802834080501\",\n", + " \"eu-west-2\": \"205493899709\",\n", + " \"eu-west-3\": \"254080097072\",\n", + " \"eu-north-1\": \"601324751636\",\n", + " \"eu-south-1\": \"966458181534\",\n", + " \"eu-central-1\": \"746233611703\",\n", + " \"ap-east-1\": \"110948597952\",\n", + " \"ap-south-1\": \"763008648453\",\n", + " \"ap-northeast-1\": \"941853720454\",\n", + " \"ap-northeast-2\": \"151534178276\",\n", + " \"ap-southeast-1\": \"324986816169\",\n", + " \"ap-southeast-2\": \"355873309152\",\n", + " \"cn-northwest-1\": \"474822919863\",\n", + " \"cn-north-1\": \"472730292857\",\n", + " \"sa-east-1\": \"756306329178\",\n", + " \"ca-central-1\": \"464438896020\",\n", + " \"me-south-1\": \"836785723513\",\n", + " \"af-south-1\": \"774647643957\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "d31659f5", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "***" + ] + }, + { + "cell_type": "markdown", + "id": "14a0ba73", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Part 2 - Generate TensorRT Model\n", + "\n", + "In the following cells, we are using [HuggingFace Auto Classes](https://huggingface.co/docs/transformers/model_doc/auto) to load a pre-trained model from the [HuggingFace Model Hub](https://huggingface.co/models). We then convert the model to the ONNX format, and compile it using NVIDIA TensorRT - namely its command-line wrapper tool, `trtexec` -, using the scripts provided in the official AWS Sample for [SageMaker Triton](https://github.com/aws/amazon-sagemaker-examples/tree/main/sagemaker-triton).\n", + "\n", + "NVIDIA TensorRT is an SDK that facilitates high-performance machine learning inference. You can use it to create `engines` from models that have already been trained, \n", + "optimizing for a selected GPU architecture. Triton natively supports the TensorRT runtime, which enables you to easily deploy a TensorRT engine and pair it with the rich features that Triton provides.\n", + "\n", + "### Parameters:\n", + "\n", + "* `model_name`: Model identifier from the Hugging Face model hub library" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e3f1883", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "model_id = \"sentence-transformers/all-MiniLM-L6-v2\"" + ] + }, + { + "cell_type": "markdown", + "id": "436d115f", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Option 1 - TensorRT Model with Amazon SageMaker Studio" + ] + }, + { + "cell_type": "markdown", + "id": "629bfade-8f14-44b6-9be7-88a2fdb84ba9", + "metadata": {}, + "source": [ + "> **WARNING**: The next cell will only work if you have first created a custom Studio image, described in Step 2 of this repository's README. Change the `RUNNING_IN_STUDIO` to `True` if this is the case." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6df2bf7d-a595-46b1-8d1a-43ab946bc858", + "metadata": {}, + "outputs": [], + "source": [ + "RUNNING_IN_STUDIO = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c0eb376", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "if RUNNING_IN_STUDIO:\n", + " !/bin/bash ./workspace/generate_model_trt.sh $model_id && rm -rf ensemble_hf/bert-trt/1 && mkdir -p ensemble_hf/bert-trt/1 && cp ./model.plan ensemble_hf/bert-trt/1/model.plan && rm -rf ./model.plan ./conversion_bs16_dy.txt ./model.onnx" + ] + }, + { + "cell_type": "markdown", + "id": "b34cf8a7", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Option 2 - TensorRT Model with SageMaker Notebook Instances \n", + "\n", + "To make sure we use TensorRT version and dependencies that are compatible with the ones in our Triton container, we compile the model using the corresponding version of NVIDIA's PyTorch container image.\n", + "\n", + "If you take a look at the python files within the `workspace` folder, you will see that we are first convert the model into ONNX format, specifying dynamic axis indexes so that inputs with a different batch size and sequence length can be passed to the model. TensorRT will treat other input dimensions as fixed, and optimize for those.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f1e86f3", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "! docker run --gpus=all --rm -it -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:21.08-py3 /bin/bash generate_model_trt.sh $model_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd0bf459", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "! rm -rf ensemble_hf/bert-trt && mkdir -p ensemble_hf/bert-trt/1 && cp workspace/model.plan ensemble_hf/bert-trt/1/model.plan && rm -rf workspace/model.onnx workspace/core*" + ] + }, + { + "cell_type": "markdown", + "id": "fe095659", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Explore the output logs of the compilation process; at the very end, we get a section headlined \"=== Performance summary ===\" which gives us a series of metrics on the obtained engine's performance (latency, throughput, etc...). " + ] + }, + { + "cell_type": "markdown", + "id": "8455c0ca", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Part 3 - Run Local Triton Inference Server" + ] + }, + { + "cell_type": "markdown", + "id": "ed0cd1c9-c8ab-4f75-a368-a719dd165c04", + "metadata": {}, + "source": [ + "> **WARNING**: The cells under part 3 will only work if run within a SageMaker Notebook Instance!\n" + ] + }, + { + "cell_type": "markdown", + "id": "9c22df98", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "\n", + "\n", + "The following cells run the Triton Inference Server container in the background and load all the models within the folder `/ensemble_hf`. The docker won't fail if one or more of the model fails because of `--exit-on-error=false`, which is useful for iterative code and model repository building. Remove `-d` to see the logs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35cac085", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!sudo docker system prune -f" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6965857", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "!docker run --gpus=all -d --shm-size=4G --rm -p8000:8000 -p8001:8001 -p8002:8002 -v$(pwd)/ensemble_hf:/model_repository nvcr.io/nvidia/tritonserver:21.08-py3 tritonserver --model-repository=/model_repository --exit-on-error=false --strict-model-config=false\n", + "time.sleep(20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9950b9c", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "CONTAINER_ID=!docker container ls -q\n", + "FIRST_CONTAINER_ID = CONTAINER_ID[0]" + ] + }, + { + "cell_type": "markdown", + "id": "3f903432-6e87-449c-84ea-4c3ed2fa445b", + "metadata": {}, + "source": [ + "Uncomment the next cell and run it to view the container logs and understand Triton model loading." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ca9f7dc", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# !docker logs $FIRST_CONTAINER_ID -f" + ] + }, + { + "cell_type": "markdown", + "id": "f5946837", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Test TensorRT model by invoking the local Triton Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b5775f1", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Start a local Triton client\n", + "try:\n", + " triton_client = http_client.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n", + "except Exception as e:\n", + " print(\"context creation failed: \" + str(e))\n", + " sys.exit()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36b46556", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Create inputs to send to Triton\n", + "model_name = \"ensemble\"\n", + "\n", + "text_inputs = [\"Sentence 1\", \"Sentence 2\"]\n", + "\n", + "# Text is passed to Trtion as BYTES\n", + "inputs = []\n", + "inputs.append(http_client.InferInput(\"INPUT0\", [len(text_inputs), 1], \"BYTES\"))\n", + "\n", + "# We need to structure batch inputs as such\n", + "batch_request = [[text_inputs[i]] for i in range(len(text_inputs))]\n", + "input0_real = np.array(batch_request, dtype=np.object_)\n", + "\n", + "inputs[0].set_data_from_numpy(input0_real, binary_data=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b47da0cb", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "outputs = []\n", + "\n", + "outputs.append(http_client.InferRequestedOutput(\"finaloutput\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2588e261", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a405f8ee", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "outputs_data = results.as_numpy(\"finaloutput\")\n", + "\n", + "for idx, output in enumerate(outputs_data):\n", + " print(text_inputs[idx])\n", + " print(output)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d88a95c", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Use this to stop the container that was started in detached mode\n", + "!docker kill $FIRST_CONTAINER_ID" + ] + }, + { + "cell_type": "markdown", + "id": "dc75efff", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "***" + ] + }, + { + "cell_type": "markdown", + "id": "ef0e365f", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Part 4 - Deploy Triton to SageMaker Real-Time Endpoint" + ] + }, + { + "cell_type": "markdown", + "id": "706db9cb", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Deploy with SageMaker Triton container" + ] + }, + { + "cell_type": "markdown", + "id": "bdceed91-9fbc-4ea3-ab9e-0e2599ba7281", + "metadata": {}, + "source": [ + "First we get the URI for the Sagemaker Triton container image that matches the one we used for TensorRT model compilation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0dd85b6", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "if region not in account_id_map.keys():\n", + " raise (\"UNSUPPORTED REGION\")\n", + "\n", + "base = \"amazonaws.com.cn\" if region.startswith(\"cn-\") else \"amazonaws.com\"\n", + "\n", + "triton_image_uri = \"{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:21.08-py3\".format(\n", + " account_id=account_id_map[region], region=region, base=base\n", + ")\n", + "\n", + "triton_image_uri" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "081d1204", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "print(sagemaker_session.default_bucket())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0adf2f45", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "ensemble_prefix = \"mme_gpu_tests/ensemble-singlemodel\"\n", + "!tar -C ensemble_hf/ -czf ensemble-sentencetrans.tar.gz .\n", + "model_uri_tf = sagemaker_session.upload_data(\n", + " path=\"ensemble-sentencetrans.tar.gz\", key_prefix=ensemble_prefix\n", + ")\n", + "\n", + "print(\"S3 model uri: {}\".format(model_uri_tf))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da4eb8ef", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Important to define what which one of the models loaded by Triton is the default to be served by SM\n", + "# That is, SAGEMAKER_TRITON_DEFAULT_MODEL_NAME\n", + "container_model = {\n", + " \"Image\": triton_image_uri,\n", + " \"ModelDataUrl\": model_uri_tf,\n", + " \"Mode\": \"SingleModel\",\n", + " \"Environment\": {\"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME\": \"ensemble\"},\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "16b0ce98-b921-476d-8921-5e3ffb5cc7d4", + "metadata": {}, + "source": [ + "Register the model with Sagemaker." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e15ae6ac", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "sm_model_name = \"triton-sentence-ensemble\" + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", + "\n", + "create_model_response = sm_client.create_model(\n", + " ModelName=sm_model_name, ExecutionRoleArn=role, PrimaryContainer=container_model\n", + ")\n", + "\n", + "print(\"Model Arn: \" + create_model_response[\"ModelArn\"])" + ] + }, + { + "cell_type": "markdown", + "id": "695e71ab-c842-4001-8674-0d5c67ffd79f", + "metadata": {}, + "source": [ + "Create an endpoint configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6096778", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "endpoint_config_name = \"triton-sentence-ensemble\" + time.strftime(\n", + " \"%Y-%m-%d-%H-%M-%S\", time.gmtime()\n", + ")\n", + "\n", + "create_endpoint_config_response = sm_client.create_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name,\n", + " ProductionVariants=[\n", + " {\n", + " \"InstanceType\": \"ml.g4dn.xlarge\",\n", + " \"InitialVariantWeight\": 1,\n", + " \"InitialInstanceCount\": 1,\n", + " \"ModelName\": sm_model_name,\n", + " \"VariantName\": \"AllTraffic\",\n", + " }\n", + " ],\n", + ")\n", + "\n", + "print(\"Endpoint Config Arn: \" + create_endpoint_config_response[\"EndpointConfigArn\"])" + ] + }, + { + "cell_type": "markdown", + "id": "c4893f46-dd98-4e60-9490-36026f128446", + "metadata": {}, + "source": [ + "Deploy the endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9932ae0", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "endpoint_name = \"triton-sentence-ensemble\" + time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", + "\n", + "create_endpoint_response = sm_client.create_endpoint(\n", + " EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name\n", + ")\n", + "\n", + "print(\"Endpoint Arn: \" + create_endpoint_response[\"EndpointArn\"])" + ] + }, + { + "cell_type": "markdown", + "id": "a3ec6a48-5fc5-4495-9b1d-10f94ef95277", + "metadata": {}, + "source": [ + "Wait for the endpoint to be up and running." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f257b3e7", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", + "status = resp[\"EndpointStatus\"]\n", + "print(\"Status: \" + status)\n", + "\n", + "while status == \"Creating\":\n", + " time.sleep(60)\n", + " resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", + " status = resp[\"EndpointStatus\"]\n", + " print(\"Status: \" + status)\n", + "\n", + "print(\"Arn: \" + resp[\"EndpointArn\"])\n", + "print(\"Status: \" + status)" + ] + }, + { + "cell_type": "markdown", + "id": "7aee2ad4", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "***" + ] + }, + { + "cell_type": "markdown", + "id": "59448028", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## Test the SageMaker Triton Endpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d55e6ea4", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "text_inputs = [\"Sentence 1\", \"Sentence 2\"]\n", + "\n", + "inputs = []\n", + "inputs.append(http_client.InferInput(\"INPUT0\", [len(text_inputs), 1], \"BYTES\"))\n", + "\n", + "batch_request = [[text_inputs[i]] for i in range(len(text_inputs))]\n", + "\n", + "input0_real = np.array(batch_request, dtype=np.object_)\n", + "\n", + "inputs[0].set_data_from_numpy(input0_real, binary_data=False)\n", + "\n", + "len(input0_real)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71acf686", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "outputs = []\n", + "\n", + "outputs.append(http_client.InferRequestedOutput(\"finaloutput\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cc9a386", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "request_body, header_length = http_client.InferenceServerClient.generate_request_body(\n", + " inputs, outputs=outputs\n", + ")\n", + "\n", + "print(request_body)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2782361a", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "response = sm_runtime_client.invoke_endpoint(\n", + " EndpointName=endpoint_name,\n", + " ContentType=\"application/vnd.sagemaker-triton.binary+json;json-header-size={}\".format(\n", + " header_length\n", + " ),\n", + " Body=request_body,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65724cd1", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "## json.loads fails\n", + "# a = json.loads(response[\"Body\"].read().decode(\"utf8\"))\n", + "\n", + "header_length_prefix = \"application/vnd.sagemaker-triton.binary+json;json-header-size=\"\n", + "header_length_str = response[\"ContentType\"][len(header_length_prefix) :]\n", + "\n", + "# Read response body\n", + "result = http_client.InferenceServerClient.parse_response_body(\n", + " response[\"Body\"].read(), header_length=int(header_length_str)\n", + ")\n", + "\n", + "outputs_data = result.as_numpy(\"finaloutput\")\n", + "\n", + "for idx, output in enumerate(outputs_data):\n", + " print(text_inputs[idx])\n", + " print(output)" + ] + } + ], + "metadata": { + "instance_type": "ml.g4dn.xlarge", + "kernelspec": { + "display_name": "conda_pytorch_p38", + "language": "python", + "name": "conda_pytorch_p38" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/generate_model_trt.sh b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/generate_model_trt.sh new file mode 100755 index 0000000000..8c68d40c60 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/generate_model_trt.sh @@ -0,0 +1,14 @@ +#!/bin/bash +MODEL_NAME=$1 +python -m pip install transformers==4.9.1 +python onnx_exporter.py --model $MODEL_NAME + +trtexec \ + --onnx=model.onnx \ + --saveEngine=model.plan \ + --minShapes=token_ids:1x128,attn_mask:1x128 \ + --optShapes=token_ids:16x128,attn_mask:16x128 \ + --maxShapes=token_ids:32x128,attn_mask:32x128 \ + --verbose \ + --workspace=14000 \ +| tee conversion.txt \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/onnx_exporter.py b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/onnx_exporter.py new file mode 100644 index 0000000000..b7e0907e52 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/examples/workspace/onnx_exporter.py @@ -0,0 +1,30 @@ +import torch +from transformers import AutoModel +import argparse +import os + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--save", default="model.onnx") + parser.add_argument("--model", required=True) + + args = parser.parse_args() + + model = AutoModel.from_pretrained(args.model, torchscript=True) + + bs = 1 + seq_len = 128 + dummy_inputs = (torch.randint(1000, (bs, seq_len),dtype=torch.int), torch.zeros(bs, seq_len, dtype=torch.int)) + + torch.onnx.export( + model, + dummy_inputs, + args.save, + export_params=True, + opset_version=10, + input_names=["token_ids", "attn_mask"], + output_names=["output"], + dynamic_axes={"token_ids": [0, 1], "attn_mask": [0, 1], "output": [0]}, + ) + + print("Saved {}".format(args.save)) diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/images/triton-ensemble.png b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/images/triton-ensemble.png new file mode 100644 index 0000000000000000000000000000000000000000..32f5c1a6aa70a24ff13d85e545db2c6703d5a997 GIT binary patch literal 32690 zcmdqJWmuG5`#w4}NP~1r2#O#B(jC$%Ee+Bk-7TFW-6h>fw=jgn&@~_+-8FRX+voSZ z??1lmulv{^fLX_}?sczOSFYc%}kykb!0#d-znNG>0Q2=R#CNcl8C>H^=*&-@=TYrzHH?9XU?2J z+^3}}+D$S9@ZXo^xFbRKcswOU6`u1G;1NdknK;&H0|Tf3KMpY4ZkXecHc^}~Io5PD z`@kfvqc*|LxQT}3VQa30DYgh;)j?TNg@fZFdajGp@U;hwK4gAPy-w8T)jNztG$iZv z$t$=8NcN3z4ly2KNG37_^62(_sbXpU_|0tAfPvPo7YB$P8l*du{D3*#h$Nrbu_|?e zQHT=B_4^uE@qxXIdSGn+8b!j|Qw)!`Lci*LVQfdB$ldn(on@A9gi}nLV#82pvmR_B zde1UJO}}!UFLbQss@}gWPOe!hFnXWcVavZI`bIajY-2(LF*||x{0;5PXM7mp5sDeT zk{MT{YQ;)f>U~xdjeqv&ThmZqZ;Nx$zV^Kr>L+`>N$oDstCOoeMe|ryiZ7qQ=XY@w zQ6|e5q-|Qp;%+A0@U){j!_5?(H^@V?GSHn?T}sFlS2hxHb$WBXl@g+AvA9HT{Zwn` z1N~y&(XT?p+{nA$>MQUzd3e6nKRzIJ-sa!qvxOno+MyA{v7N^LTv$IWFIca4a_ZOc zXXVkW@R5`uuvx7ie_&FgT8B%IeXtTeXkyH9xIfGC+{83N7Ibd9?{>Y&IX&hCq1&a^ zGzx6{^W9}RBYyv88#G_lYtx)PX3^;drM(^ogmaoAVMox7zCp@S zYXWs_D#P^?&{xWB(J4d$*Z&g4Wh)PF&X0DMhjs?jdf?1) zS^c)qYw~}bMT+9}A9;<|^t%abs&g)&I?nNTB*)q4HEs!1laGW1)1>eQ3aH9^oqz2r zqdx|ZM}ZLHr$;)*^CIIhe^2-ZpKXf7$l=Y}f$8V(sU*Br z!{57{vyYKLzdm(8vtQdKJlf*rwOM=6)5=}u{b3s*arGtmLwVzqprbK(Fvt z>4{|9Orbetm`4mdM-fk;P^nDmW==)mcy+#WT62V~_ayi2d=e_;Ybf_0L80wa#IYX| zzKj>mY5iOWf|p*d8xY|to4LFhL9O9n<^YSUS8-lpi7B=;KL=LRQ_WV}K%|AS4aUZ?W@pF7966b5O}nI@66EBqT)M3AVYU9eObBxe%l3{lY(I~ zpiKRzJ!H|3i%y8&#ixn9si<5IT6GlC|4kGtAc1T5MT4z0_dqHl`{?2s;m_Z8%g)?! z7@tJ3UiRNT`)X={RByU*#w`{5p$)g%qMJhun|9hqT_dR@zkuJzCk}rT-)jPO%x_LN z*{Z%{u+W87a3u%BWXs*0#+xE*E-rJMCDT7e7S}n=+#3Y~3H-0I-+h^X z8%0jQtD$A*tkmWTEnM~pke~a7>-eU7ZKHJE8vf{MeXv($ecS(*)G}8mqjMwUoqZ2r zw@|C`9=qO|R1N*<$gG?8I0GgS+R$O5e}Q|d-CUlA^*t=R%t+DQdeKKoz{%&W*jz7V z+@CsNfi;(U1W&@*OU;D2i&I3J$-D;dmrhw8^WD>fW|hr$FT<$>JjVxBt)W}TyFRU| z6a(b1B1MlaS~W>dFJ9w79H9D4LisqOy>92E+Kx3Lc3~3N*_Vcj-RnD{xIN$zKW8y6WEZ~&y1 zRI-u6e&<5)u%M<|>h zUy~rbCz=V*ePNZ_FJwI)Pk86aiv=GosyH6D8g|=Y*P|uwkS7(F#KnOH82kv)^#`xj zX=G#$w^-oRj!J8o@^Zk1$#jyYitR|dtrl_F)MA4co2yW#QL##?1pXkDmr~qvGIM;ha}Mf1a2drqeOq!-fzXw46>Ldx|Q0)P@CJP4p&Z`j%etr%GfA;8t(>1d!&oZ6`8$JatbTVoh84S8G;EqM=zHgjH;$FmIE4<$xe0x&E7(Mo2-hOacxht|x)H#ts%k5`J z!iHp)x1>CvS9(eHyT?!%-7vf)S26vt`;@K*FT4IaXY(dlPN8?yot<5t=(hN zx-wSXn^PN&4qZ8L>=CS6p343+W{C?!7!*?`L%v41Pd6*5Y$n#uCF)^k6?MCT>Mai} ziJTjxor1vkh9|z(9NFD=*WX^YcbPqjAN@X9cdok6QuL2Errr~Qo7w*eNag+d-(0{S zuXmho6@$l+8zJy`w2lF9|Ety%rb%izE^`3F2U78OC`LPcn^sUH$utRvLj3H2H&`b6 zQOLE~$f_m1wgTDR!E%@;yr?H;!fD~})g~yOqeIxYZFXZPG7ZT2qI7;>*&T7;;KxeD zsqG&`Y3$AKlc9poBbJLK2HfD|j^zjPo}}8PYW+Rc#7AlGn07LpTSw|9H1(LU$ix$7 zFE;A4(1cb)sX5?YJ&&~9-8=Zb<2F_y27>>0Oe_w;Sk7w~6&Aqrp2ZZKZyfDVz&4!B z+-4L@T{*uQc{)2O#4urTkI?``UWGd9MIT_Og=7IFXaRYRkw~=y=#EN`!r7#(1Q1J` zp!o_#!zoW>eza@XBUT`u?NPCv^FS>I+ez6lJHmP2V*ROFN*l8yYkC<4H@Br6nL`vx zX~Xdp#W=K+zuQ7W@~HeO5)OVx4j<}Q)4BGqfsemlrs0KOh@S>}K~Az1xvHMhe-q9G zkhvIeK8z7w`dI9|dN1&Qgf6Y;;XeZ~mZH`A^^`^bc+%msT$;|?UVn@8^dHTZrTmfy z?v|k~=&&?>A3z$*mGV^-R}DB@kL6ShlVP{56K|4#2>wvEVoLt?JAccO33`zoFsscEr?Lq%6ct9xBjg}!=J;+gy-gyIWt3x~(* zD3OgzBOJp_6u(=J3A_U;XP6w`+nBV9cg{e1LY-5?%?`2qIT%+ZxIVXtlS ziHnWn^dVGs*R*S%w<8yM#~4C-TNe-{#4Yz2kHomj1=B4N9U5HK)fecG(CAB&i!AIk ziqU~wX)IUJna`L$W%_?pvYRahZZRyvpKXf$pKdr~W{w=4g(&cP37%IuHkH?^S7ijC z@t7}tm1eUU1(Oy**vW^ox;B}VdI`hyUkVRx_Ap-!FMK9s=sy{yaLtRZLk2tFJQMUq zpEn_;aDDYBY+_c&_dY5j!&sa;?!!Sg>bBM$Uz#|P&+TV-s`&fkIG>$k%)hz56Kp4WG#%5kaO z@^S;g9y~!ake`_do&$@D+gvH+KOvDg@})X_eg^M{ZRza_p(=adLir?o6{XcL*aD)K z>qjEu9NMo0GT>}YzwV(b10+9|S3SBvb&N>uLzt>va)`wo?Y7HHnrq$)qd_jPL@Fc2}F*E~6ojVC(3p1GU1HQ@C&5Mil zsCIlc0ehpY=MF#TXRNTWn>9-ce2>hK<+AhmLA$9*Y{Ix8z1!fscJ|0VRjniP>SR^V zEw$)?$(Uv&KnoZ-6Kq>V_x{$iZYfiHfGoKhIEw`Ke%OVageRlV|4-Bur15Tv%Wyryi%z=fj=WL&5qWIQyL4Wb1M^brW4x2=BZ9=CbRk=*rrQ z`mFP}m^WGtHRw-u*GQ-cZx|xyZ*=`MV}mlTr6TJ<#M-?!u+YcMxMdE=RoW$8we|5` zdKML$R*6XR%X0IAvNK20vfdaf9_x`9n5n`}Sk2%YVw?w)%SH7u#w7#MiRvifRj={k zjS#URn;aXBDIT0P!Sl5=Ps`$hRdc1B7AtlHab6@8FGy?_FC=PetFoTNn#te-{=5B( zG}z}ShM7s9hoGm&+6I^_hju2I4wTlhO3dZkD|#I4iEfiJx_lqgGxgo$0?(^%!&ggHkB|(bU`Q!a! zXsOtbU1s7iFn83&DuB+4N10 zQ^lPPtf>38c$Zlx?Uh!JUV=pd#kl44!U0>?u=g|Eb&^RkK%E4r_u&w0k>fehx?$I*vUi;m(AXSxBEjPU%dUF+=T#`fWDh45vt&rfR$|PHOex z@-g2z6UhwrpFz&s`Nes@i7Wm0TUg^c@5bv>c|q>jgl^MZ| zZRhAv;XRqHhl?)q=VuB+$+2S`j_M(T-65n-I{6|alcvc9(O5#Ln!JV3lfFEl$qr~SE!We`7&eRO34Z+ITX9j68IBvx zK57@Ux9~O-mb<7~tYfS0u0ZblIEzg8x|n6?1?fkLuc=%0^IocacN2uto9{NoW$dHu zJV}INVN@LnMs^G^#TqV>!=m<_8202Qg(y4RnVBzDU%t@Vu=O26OQ>B5P0;zDq_*oD ziW}HO;nGY)KO|$`>%MZ!?)`Wt$M!}Nxc~iQPL5`1wPZpSuIeAPU8v|~_ubyx@`}yz z2j#6FkvBd`kZ1W4b<4g*EXRsET$$fCBFOH4_wpo8spav4onA*CGp?0hEp8#vK4KXy zRf0S)H6x6NbwXZ0MS6NeRN|;+=jk%f3BBEEsygNe>93F5=?(VuM!L#&{FccQ<6IqR zVrs2it<-4oOIou{1n(*f-7;k=RrgW9^Yz+RBcNi;Zn<;kVv65^86w9>Q45_jo}oKy z+dRY}VHN7yW(i+)>od{6Vt~-jp32S`w-)g$JO`@ewb%D8hYX0dafaoYw4nwJ#nhf7 zrG0w#47Ga6CC0xo#`XS8#8Jp55-;G>CR;_^q{Kt=IYRx-eUd(r&6-~?M_ZPo8su|?m%56Cxe17Ek14f!y0$o53V4{4|YA%i(y6!`= zY-AF=Yc#wK9bmDD8RX%m4RDCH6rmEBmN_evALV&V-z=~@DvH z-|6kkYF%@ii;vNhdw;jDtvAsxRH8cPEzQ6kUEWSBCA?LJ?_CfNk^3=!>*WBPqX7y|CRg^{u8P zUmIE|6aM#wJ$hGxSI@(Y7Lder|FU2^slE5~kI%Y)ir}2ymxvKVk!UV}wYrU; z4&Uz?{Q6q7ID0YkjSY-bdR~_C+*kR7xP+Pah3Vt6Bwf4=1HWOU=YDVEcgU3lCIsC` z7-{_Z5k4l!L`!-HnV-UgR4win(G~9<_K9P)%kSBwy=#^=MLakw$xwT(CU#I3mpW87 zl8Ul9GquW1=+Xe9rXY&vyw^x==`NLe_~P71bxG>%(cY_rE~n&sYdWaWQXpw>SFL%u z#4pWD!JYpKH|FH4%I{X!V_7-54-GbR_MaFc{_e}gK=O)?R>g zxwuHm=DMS*BlTfWUQ~gBAVTsd9RgV5eGq9OTrGK4rZ|2h=~PLsIuZ$NE$xjZLO@zb zA!~4xqDB|4L;n=$%Ri@1J^3B}2Sj76oz#@vY%_VW^PuB(pt*waP{eL)?Sn-Izy!dS znY9^c%}bc^Bycrb8NIAN4{?UV4P54Ggyc3G^V?f?lLzl&#!bYqFiTAoYlLTShskX<=-zFEFk@YlG)p=Vy7w!`cyM9$Trd+MF z&kf;YRaHD;7Y$|fB1*~#_c}EKA;2snFrfXc={J5l!(RcP{syL!-X;zX@t9OM1` zl@1)qW|;x0C9~T}?g;~8`qc3^m(cg-I5_@>g<*a71Avw%Tx4~9(l+TgsUi|BBeAP} zBUQv@?kJ_WC?S=)RR(F3k^rJ4H z&1}bvj0wIZ_L|#1{xTQz06Bbbzw^j66UNVxeQI zG}*vg5LQ+99|7gdA(`wj(hZhm8H`S0BDYkkRu_c5c`tuf^@Nf)P*&VvZ?|&uaVLK7 zy;W)dt!`kVag6tCp%Kj#5EI+fREDPIFEoqMRB06{S$Owk#e{PDCTWL7$dIE-Th<_4 z=QEZ#Znl4$Vw#xY@r+Opv z_vXt_foB8$X!9wjywq(SvNrA%CYx@ZRb09e|1qz)37EDZ_n&Eea`Fmk6{X1qeZaJG zJ;esnLMlb*#52|>dV+`{&3|uB+In?BktRPfK0uo>0SFXBE>WxmZ~S|r;Adob@>BG>Kx~F*dQE2l zxe~ttUaULk8sqZdaXF*@H~ZQ`iyiR5*X4naxW_hv--@K&DHGLS-Q4I}_!TJto&7f7s-wYuLOFUX-b+ z3UFKKtk0^oon0HnYXSL^<1gSoAMsl11(FFegV3qCEsR=es=!aVNpf+?OVVlRqB?A1 zLF)zkt(T|=-R9!iso&5$n@?0tecXOV9^agH;#XAg?}cTWs2}u5gpQCypU_Kg`CS(m zk)0KHt`(7Pt_P{Zoz>&~>O>HTR?+V|^ViLbbPF`J3V5J8tXr3FgzSa7BSB>>L*LI} zA1JOsgPF5CJa{SQwF^S7lG}V2JMI_+x~edfHiLZwwgG|*t5)hC@sj|8iR`wby0|n* zn3W8qNdM}L>R7_lZ|zRHc25#-)zcYorRjgM1f{oU&$bTfaIWDW5Y8(c z5*irtT?``QuqR5L1RK0AW5T>6{RnYH_FHidh(rJO5`>PCTeao;`x0r-T{}OYw;K;s zUT$#5+z0Iurn-{%w+X7#in=F3`}zUMM18}UN*`aOc~!{8|GIq-X#9Uw@ssFC=^_V$ zfRYWv726GvK>LUMk+DcU$a_~8N#znD>BXJ6&wNxv0<=>R0wIcokHJHh0(51CNYQt2@ zF+?)=nqG7SpW3}Rp5Y9*;{6||qf8PIPcPVr*KMRdS-JSqyCmRb=fOEA2}Hjc9Qkh~DhHKdEfRbJoJw zo1=*Bc<2p9W3tE^*h-7ZsigyuHkg>f=Df2Pu&vI-EU#nhXLt^!sdti{)}FP2LslRzeW z_LqzqBJTgDntu`e_!5)nXSAq)WFp&`?Yd+Z@zM5E%10|=UDK*502TKzOJl^pZyeEl&eAK*{a@E#pc_oFZ&+aXYko)YmvKL|r4zgx>jUZ}LLLhH<25^iB3H!g32MzUScf5AJT-;y# zGbMROuiCWTkkeHJDT_9sbU2ZI{LZmvwaB0tm#wr})&1QUq6#bieZe0FN-9a&qv_qG zkT(|vNs3Y-tdf^?$TQ6cE|tw=64`JM6VGTNv?w7+CU>oG-9Eam4+ zV<#U^-bEdLmLeUtiYZZ6X&?{{<-qV`|K;!I9>j^5t3a>N8+n(Oyn|SQ8_LUXu9eQU zCsfSnMA=7m@qrT`q;fz6Monq)zEM2wi|Et3bt4(#c?+mQF1PVJ$^8z8Hsoj-d;`JZ*w4Y>Ix&A1czGWslpf!KnEK9}TnDP05 zlgGC-bVN~|KBYkwvk6;^Nr`&%+>{BYlv*uYT@kq-T8_}0Gj<=x@443u^ZyhjN9CPx z8{F9|T1%4tPptjxHx0C=IV+C7?9O?4r9={FwhhQ#Kx+oOGBB%930BbqWU#`Z7-;zZ67>I8&=n6t89qnzZTvJsSd(-MpVcW;Zkr66sniF(3I- zwTJ)yF-Ve1-+I<}+^*8KNw@qMrNJqMQ&?<&R@aKl zS>YwkQ4J--J>i^sIP}H}-5K#>uO1b_*?ev-fp3=H?+RX5wRfyp_kIj#wQj~cJn0~2 zo_hLt-9%(|*GP0@nc;n(Ri^gXP?o0L_7!dY$MMA~ch>v6QEqh?<&6JCdX~>E=c(?I zpxW_o^e;4rEx*^Z(J+tx7PW#x;x|&FmvYBLkWCDso++OP2ay;Uu&1WO%_@06uf&hO zW<2I-E!ENh>?%L?wa>a6z~{kEzu>0`Fj_v3PZ4Y_yS3e$%H#n^JQ}ybZP%!AUcQ0Y zn?HG9x*j~K9@zLOjJ+mpn6^bq|voDonB^EA$lA5b1rO7pCQk=6CmRG0Vy8TsA!JACZAu)l6KhNe6&GHm0 zo!J0jNod>7zfBL*5Oc*N6VpUBY}>0#Y-L6*(LlqOr<&W}uw?Nn^$e(=0jUxJ9t(Zp zE*KxFBDI=mW>n^xA_8QR;`M0!jw7*H8rt9^dNOq#CK1HwJkpn^i+h`jw^gyBkmA0k zPjMP5!7|g#rL#AHBAe*!<+pGJ-#-Ch=d1ednOmW$Xv#RH(uBVR1C4=uis=Dlgwawz z(VjAW6uqs)3#91@!n+vDxY>hjNrwyNnRyTf=(*9eQ)d6w5gvew6%}Jyn0!P!nj6CH zl6D^cvBjn$-uWZE35#*Htq;=_OJ3bCti8Mx`(Xw^C8A~Ga43IQJ)3oTG5E|h;HP6{ zolQxEjffh+TH?$Xgn1d|-J07clK9|d2>&r?upd*W?zT3#bDQ$*kBk zWZ&(rdD-0A#IweHA5Y}`WA`?Efl$z#meXrcPqxbniaJIyvHhNeIK0ETRXW;!7CY4H zwSQ6q+<_BKZ=>1`{+MUl=Fw)k=nyHZ1#i3`6f%$Tn^Io^^a)^qIXp&YnW{sdF$L;s zH5?DVR9bBRTxFf(j*>Db{5I2jQGs{0hFZZYKI|Hx(Oz_sC&$NPf~k=KNWn+gVZ2xWdCO2;bVMig2s=$uz6=?5ESehzcAw7EP`8 z4a5@q1PvyMei9-o!^D7~j@-w&$C7&G{^f{KF^mTQT%}S1KR#@8mCHz+Y}Wzi?7+}& zTC{lJWRZ6Qh=~JPs_9L`m0IsmPvU8aC zlalM45`m6=+VYu@Sfz6Xmf@f@E>5F7`c5v90DMQUw^_^O|xQ?AoZcZeJ?9-4E7k$9jl z_}W@wpY)FPZv7mN9-l0RhWmoF2X3DxIbvR+U#B%PQGUXG-xLZ3*G!^t{yqcGh3 zbf$wruS106Hu5BQx+f~1yR&>z0BAzR2;%Hon6v?P<6lr^$U3j-p@uCiIqQyJ(57Eh zAB~a!TxQAay6C3k-=dr0E^Z}(cqw`IEYhQ`YmX!ajZdvp`wJk2VwmjU?+ZMjC~+== zYIZqx5ugIDdkkYOh$=V95w6X-hsgStT>y7Rz0)H6)VV-U3Vb!>lQbL?A&^?Phne1*Tuxa_oSHNzL|*SNjUmn?4mJ@T~CPv(T@dI*OV!*>K8 z4E5r4sJ(*tH)hT+hpWr22pU8^GaE}BYhyY*zJod)ZnBXcP#xSApj#O$? zXF?WW@h1Hotw5W*h%02}MfKn_w&U3(cjp-Zee>C;5xldDef5sSGMVPF;W(5ytPV@+`9I0O#cd^F*udlua3@S;oAMs}>cRQ@U+| z<_ZRogGSSuQ~f%P)UeAPwfN?XHoyjT6b>$?Mo_@Zow~}r zkEui8F9hgN6r>$CDL}0BCNE9>(ipd5^fgm|bde==xp66}bPw`s3}&~30O5a-l*VU!1yDM zH@J*OL7KuTeJeBS92;}WBQ&|#0ds4$xAA$OY2W@ODRo{D5h=p zI72$4&fMZ@lL`Do%$dH{G5N@fH0@vGz9Oub3*xvRDIlSgU*aWj|8RR;Z}+`rrE8H{ zpWzu_(u?pu_i+vis;LizI1v)lm)Bp;qb@A-3_Al4&JftWO-L%8*QolR@}ErrN$%sY z)^!^%9w&U;_AwcL>SESmC}es2nb%0>Ip6qLm(@M>36!N^g$DY)7TK(_VHy(F5FQ>C zO)i-)S6zn$mOfSbX(JhoicV@8<2p`H0CkZrE=Cl7~q^^9`4q$e+Als{@Fp z%fa~jV`pKnTG;l?rvOnbEWl#+P;#%oXve)$c2K4q_%?CJWs+u*icdJaxE<>3*%wGU zM9kgC;1z)9m3877Ik8^vHHVCfF|8%r%k-w?=Bfu*d@i7{9V#nZElt4qxl_6Paz`M(AB#kefFlK7`=?&8VY$; zFVz1PWgap2=1KU3D|b{6oxDrn1L4*rcT7%&k}Ur^Ha(rH*@uO|R2k$J0_#%-14F#^ zu5V=BZc}f!E8rblD$IeT_P2Hz$&EC6{a|fVEX-m$9M^_X46QEK8_A2b$96PZ=1~$l zDK?wGJkHZmY5j`Okr)kWWy;m|Orlp7Q9x93{?!OP0rk z-*#fe)53Gwk2sYil;;5c4V_V0kDWdEmhR%Poi`{Ew}o0SXC=k2r&e0&KnqB{zR#EW z9I=>UDf}At&+B9t)}$RoSfk`hwf~0Zg*fCuBA0yO!Y9t+9aqYEKUg@J2hCX z=*9lhHQV-bJjg)-3#vx!cRC&`_?^MwY1L&F5pl08s^Z+EA3N*GM64p$Ula60$szY< zaZeNv?f&8_N%D&i?aiDbnG*hAZ^To(z#Rx(5+Jj1*OzR?D@+e1nLJ$H9py2T@AKt= zijeBUkh_^LVK-&O-EPXOP_`hGi2)-t@IbTf^-FAD(*W6A?Q4nFp8yXg%XFdu3GW02 zOo}(*dq%k2r|k)z{;f7+jfnQ<4rGktDH@M~ztw7hM@+BZVv|X-MO%R~`&xHF9$&iMS@XtYMU!fYH&lR=27~puN=`yBFEN8#B6Kp6;6^q-J^GDV6=0>zWPov2zSaQnKyYQPb=Lg-LD>0$bv~&80*!ex*vbvIG=cz$B zRII@C17V{I*@~K%ekI(ShpaG4lB6CPB5B4FK>E0M2qX3XbyV18*@lc6@K5Dy-iLon zj>}cTKU*?3`{mLJQeV&Doq1J^>BNta>?Bbh>+V#Wr7VCJAJ`t$Uiv$1t|VgLtAs6q zdSjXRiMn`VPb#rd?TD zDKOg$kU^}HncFHFT1})-aG&nB-Ay2Low`%A@<(|&5C@HoIJRT|%t&ZM8W%SPZuCaD zO;oir1zyf(99m}RW`z+8t^6TeKK{BWC#6h(1`x!PhQ-?`QJPL_c9uq0;EPD?Be2i6 zEQ!fmhhRviVEtIT)aEV+3fORAy?W*Y;R<6axv0S}WrOrwjip{;ZR&x{gSbCR(mmAt ze$Rg{45YLC5iW3v-w6WB$|bf*r_f9yezv zPg;6hb|V~o)JQNksS)W=n}RE~HWxl)Pf6RM$hARKkfA9M^3uH` zlBEy)K@3Y-(WK5#^>o{M+B8`w-BY<_E;=kKNQiVZ>1E?P$3oWqF><3&Ch37HH^TGZ zJ0a{KvgHXY93tH82hB~uLl+9VgCAa8@dm2vj;8TuHv)A4!@_hxHc;Fcg9ZqSivn@d zXeGI^sWp$#q};xKQ#_DmQbOkz_|u6q1*O1s*2i+Q7g$t zY|L$5SIDs}ETn9|h;U3E>v)>AoFV>_xm@1@tW%nlVlLb z8#){4a5;L8J04ua`U}N7cN4qO_;XgV8S`W72Mf*x8X@yoh+R78sfCkorE`y+)GpnG zCqNwf%jeRp2cL)X>1yv|4l^4asau8O^!Y;ti$_Qil_>m4x{#OP*&`$#RN!v*v-|Dz zq~b5nAU+{nENbg3;|CEPTo#ib#%_a=KQby_hK>Gu)Rybbb`X_UB$XI=HK9 z|ukW1xus)Y#P}%i7;Q^DCE~7bNh)l`8`*==>!i>98%=LIYGs55U zH@Oz)hxY*2Ky~MT5LO_w_1TmwD=mE4E-pG9)BZJ#vG!5BlsKM|nR7d7feBj5qXOUl7cF96p?PGT5AuAhZTXbsw>pQWqhTl9BQmcX#qnGeo#X?HSUmj)v zdaW->d*POR*Dsq z0iZJzaai(VEF&#NVX3(%jI}>bVpFR)`0ky>mm*}@g><1uTIF*z%Ou_f8u$?VLJkPC z_7bv*Z0^hDzV_6NhEq2%_Q`=WPOO4HPt>_z4|V49$qHjz;ZHj{qlx%j@?G_4V9f=K8WTvEZnJYF1(?`RNzbn^%;KGVr_%hj_CS@UH;fV=iAch z|2U6QcSU-4D2%_h$JcSL>$1rAUnL#816x>^y4tf_6NaSzyBrvxz`SM{d2)K$p4&pe ziC(qEZiFx-$GOnOyzXJKCdgWNR4K)pj|b4n{-H+#c)O&mY>H(SYMf2!=^@78OFRWC zwu?{x{+!W_tP>xsSIiRGp5X(4`(IKWPzKTaq}&>2R93qsLn`LbkP?X1@u?J{o8{}x z&w%eq_u~P~p8$mIpH@@Jd?&5#D|MlcY7b%2-z1at{*}KB zfZ~_F28@VGzvy6Jgf0R}-|5-f!ssH^aGuru}T5hsXHU#Uf zwVJJupR(7tEK9LBtcK*+{_QMfAkYXuIQ5Nb=n?Ej=^nD&NK1{oF{Pg8moreLR(5=- zhhAiGb{9@}gvEAg%FDJ)Gb^gg{;wA~Y00fSuyPrv%>IZd9vIGc2=2B<64UJ53a80+ zRkE!reEAs0v{n3WGNwTC1@nL3nbgJ)`2230nk~88;D7@~zx)r&tgoKY{&&LvP<8%k z9sUn*=l^Z+I_tw(LzHgpnisDdT7b~$zPz_2D^UivQ5ic-unC9=djqx$!MA| zCm@plk4hu~l3r8!hti+~JQg7b^vpUk|D{{JX8KDSDFkNnCjJF;lM(>6%bx`N0p-wN zRB%Z4FYRI%&;T5>zbNM>!(X7)1z05PA1;Rzpms9q{JQ`ajl8^n;k%>+C|dZ!;iZkp!}%1m zlS>#5m5HJNW$b)nVq!{kO)g|$*ivp{naL-T()06kn;lpz9S`{1kb;Zf6>k<)bvhZqvHK#|-WKhj z+T2|p>XONlRJX~F8#|@|J#Lw*@&Y7bmx9h zd?hpvUc&^s1?W`yc7Zmp-~#o^)=0sVIw)jRuM@N{T(%y$k#gqbNz`8K!1 zW$?F1BA7g0=}xn;)Fetd(9THnP*wUD{`HmSE(bafN;DTpo=TTl&ix*0>rozJRwp77 z2Bv`)MxcIMiD;5E_&<9T44jv@Z=&GY&UTULin}}x+l8+zHrjb-;1j#h;gKt7y0tx6 z)9+%b`f+<&n46np)9+Tizp8t!-tSVjjjm5fp`X!(pU9~C(VPCeHh%bTaZyaXeG7)9 zG+4bAoSinVvyCVEy9o+jf!lHM^^1SE8iN6;)B4W-&Q7HHTse@`tA~SWxm+oWUl!!1 z4q!`-dtco<05B5?*B_N%%EuUJ#Q}6R>n)x{9O>`e1b`ka307{f|9!hh<9GchKbA`1 zF=KU218bCQp`u7JV>Y*$g!zdJF{v%)Q#JFklHAiW(&uS|por4`fPwP0-rk|vrlkPuV} zY(9m-;B-)mjv$v918M4{2q&6NQ3-XZAv)btj7J{SPSCZAks3R^4lRX%GVW}3B-*#PAU{W`7dmp`ClVWRE8*lEzfTfw@^$VPper=;1m?r%_Y(muS#Sh>P>g)-+pln0 z@}knx{X1u!6=;;kg+CwFx($}$+8>bgPsQc4Z_-+ujn}QdY3ZtKaNx}ShwnX+xSi-N-7qRYjmRjb;h zIq_TmvE~a|XVqYnW0gZES9?=41dhM%IMYQ~G}^x6EjiC>gNt*P%wj|$cf5ViayHSe zU>_;segFmk4N9vHP4OWE{nHeiek<*hzMM3!im9A8s%PSCq$v8P`~TD1S;fWCZhN|s zAi)|5t|7q*7Tkj+xaGs$-Q696h2ZY)!8H&X3GNWw8z<1XPLaL$IcH{W=4!YDeho!+ z@m9TS{hw!X#dIVK#j@_m31Zgdd2&xW^qXrXi|k{<2N^FN(%Uljwu{{P<+j@?As=zv zE3H8N5g49)_%68?IzRrYn`9zjbC#t7bI>fY6kk{3J-zH*mh}zmLUM`CAN!0AWs&y7 z`Xk$j#MSgWI7P~Zlgx*Gie zhb!F*+mn^H$890oGt*Zh>=#x3?!+`_A!{Q%l_YU_w0l!9DeldeW)I*hnA2jlml!Np;XTB#r{>{2w{P@*CO%M zDX7eXMCdAuB6tkomw@tv@NtK!xeWB4gwEBU&(S*@yL7rr%u01dnm;rCtv=Rzi0QvC z3{l|1NZ*y@53;@I_*J)O`VfpAn0}>w#%`1>aVPv0Yn_Aa&S0)e$06T#AT<;uq&iC_ z{WS8?P|2HT`A@9Mt=W9>k?k2p#;3e{#u6QK6$)xpG3zTLL6chG8~T=;rNEsw#0eAu zS{FL+Q(EB$zG{9ZU#Au!YQ}Tkq3*5^+&!YmPW@vPb z#~|HQcYtr+z1ALaj+{^Q8~j4M+)qN66&%Q(}KG!339aAE8lyTaI%Vy@__Uon!= z#2#?r3+Mh5$7;0-i*j4F85ZF&jQ9}qC+^n9-A|hO^mMiV2E+~CM_21S>d62KpJVuqpbo2qPE`q!3}6)hr?AU&gE#> zVat+jw^t(Lm%q>oD|hiKIu&|vniyn&WCJK#g7nR3i|vGKf`K2mgIhnMj5~K}+!ZLr zY}ey4-YTe4I=ni~MNNL|s%sNUKP_y(bC7|w%lAn!%t@=}0*IBjWME>X67OYnxml$q62!(Ho=;bBIcI7-Ik1jwP+n? zvj4gxLU|1VQb?$@i^~2nnQ0yj8EdmEa)2KTfaKsk9Q1rPLL@)hMn(uFKYdwaGN9> z3WAvX<^HCT*81R&7`|*UjK(j2^z;RbOPnqCi3pruzrXcLQgEg9m&|TTPyMnDX?);G zoMCmBFDo6Mh&6yAW^_h(y|$X9Mkk@TFDlhl$yV-{C*5dgG41H?&_04$GC-T#W+soQ zd3_dc;TT*Fmj&*_zmJn_le&fRc^V2s&8l~E(xO`-2&`LFPWjiUG9`1<#Vx}?v}Oj# zoA6>_fxOx7am#d%`N2CQl44& z5@D{QcUNY5VL6`1^)V@1wQY&9#Tw$4dR{|dE`{CfF}nG);rSU_o-dnHQj0wF<(pRQ zvBLYe=M!zV_(xay=bN2^?-9}LrZBu3*$!vFiEL9Db+k^}ug$+ROp zIw`tejW$$tr3AOFrWC8UIjGh(ZK zqr0T&8@I6H>!cEi8tqVINe8SMyD1+OO=!-d5DO?8-?hHk*%ZfQiae%_vK>5fqm)t7L=4e@n7qfD6dE|wuSa8lCh&r(} zGVDT-!bjH5shn&W98%1hW4szsn1E1DuZKTTLI63r$O2Hobn<*;D7F+?GfE0cLA7dg zT4`q%Eb63{g8FAG`5mvMJri zx&Yw}BJO&NaCt5Isw-w6u!>p(nvs=t>3XaSKrX}%Jrevb+E8@hUk_AVH|hnYX3H@> z)|X5no9ffCjroG}zPwPI7>V?FDZSw%i5u3PAs=1<&|Piyqtby|3MgQj=!E}kUj#*i zfN-2)HU@7psRkKwAopQYl&!U6Iqe<5gUyRz{mEMe$yuVj_942<43Q}R4LrEaM1+s3 zt8yTG7|)kqR3{sIrOb@6dJA603w^8*p$hLoDOt#@Z+`uXFQsev!Xf|r zR56jm4#@!Ver0TU!|nCS0CneF@(e6gMOkwO_4r>Npf~4~xwPR^YG$l8ROC6640p;6 z7hNAlNgA6Yq<^)^we&AnX&YOs^et6M5{pI2G{x?)vl00LR-gF-vH7`Bg>Gv4@Yu(h+TO|CKe~J-D7rXOy!hc*;L>%*?lv;POB&F!e7&r`(uJZ; z{ChtYK$sI~JD@iqM~XYI7qYkWNW5J06hkOxkdrWQ*Hsqd(Fp zE2pqI>17KL-#*WQk$gCb%YG zDMM)~eW+0SWp!4QVKkL@72;qq5BJxnUA0FUYTy1jN$cv+2obT~DPK9$KNq>kd=!8t zN~Q>8|A}M?X9h$=R@VXZVi{p!IFo&=5*DAYso-~$B`ECEhi`psE-RQ<9+CT#BncUS z`mn~|!ktH2#gpCn`*qW(l~+^hwVAd9OEMwp+&;Qu6h2;%&YW}s-(rK;9u*E8+#MlZ zyK>ZcK`lMvPvJecpV5iti?8kIkeoWW za%3S>4umU_YX~3lyl4wYxb&lvhjQNQcEUqtEs8;(>VJHjy!kXFlGjcF5?~bx+h|f2 z)Fv0d;a2AL%aKhmOYjJ<>RwdO9^y%6iLwX4mk6dioPz4>;O-}US(aYuXR?Q)aA5B4?%--NK2 zBe}o2C@)!|>%U~Ln)y{>N}oI8AH&{RtM3wD6GLMh0UHcdU7*sV?>uSkjKx{omDzdaFsfYx>q3Avx|9Gg%eB#w*HD+YR-Ybt<7QFj-JTPgVgTxqJ`n z8z0G&Nnx!!=8Bg}I)?z@mCC_Rh$KTqP{3vcBYzIF)|^1k%Wu7%o?`Bgow0z=7blo} zo)i}G%+vcQSYEqTiJU?@7L$n0LVZite*e{x&qaJ~3e%+Gm5wBP1t~p7!{eJ6S*7j;lH|c@4^RSQX$-iQRJoU*N-;{l(}O;-R>2itTC7%?4pB`)-ZVJ$Nz1e`5!`?}#_jeYdVNJGXK)J0$q@&}r? zJwigs_>f83r4cNWH33Sd)QOD29^SNVuf-_OUE&nnyIL*{D@*>--%%26;K)q}U(?$V z%fFBO|0%c?A_JY|jnsvqt5H1t#z= zAIXe9(@^_8PZbSiC9L+dxZc`4OBg02JjPSW(UFxPME z0re9}q&Iq@seHQvzOx|Oe^)M5Q$?7}Or%1?$in`?gRf;q#&@^{QoZ=e7$jXI=De_<{U%sEU$7rN28kKJohnx<7vp^F23a8)VB}aT`8;)=m`KCcL$ToD&f{d9EW~7MeiB=1FF(@TSgnPU$p!YFE zj&&sM8q~ib2S>Oe-tt)czn@PAa1biaJ+O44A&RaTNv9s$b>VuZfoNE-gkFSmEUsmf z4}jj)2_!^6h&c2nYG{*-3^tAU_igxN-gMR$3da6>1^ZXE87R+2^qK({N}v?-pUeOM zrQGcr;}|^GT34-m0(bz5rY5_D)lUmD^sA|q6w4i-uY z7`=&LNd2ExdVZ-F@U`!kFB{uJkCdL-vndxXnxnCgbNNooo^Na+pTY4}y&9~DskRZ1 zm2C|%j#_zSsk6`R{K5NE5XI;U$bH^Sc)0JIG3Qs!HI z8VXpG0p4=jhz6jxX^TR716k_SgiMv}0^=W)6;xeV{CoT#*Y`NzbU7xmy}} zW&oc>IdtK=;eQ%dgO#8-In z2E}PlsU9FMwKf5|?dBc9hkfga@Z-FGvXkx{Q!VlBs&4J?RY}c9whzmG#0F`LmUnv` z2VM*#IGj=Yf^jAhI?^uH)P(oKTZ=AQ%nVSJeM)mpS(D0Ro62H3o8(2D8OtvuC$76`Ftb>OkUBN7_Ivbv`RGF%+xG!ijy~F><4makWWfg15EB=MGul!A8#Y*?;suemZdnH%p~)i-0n zC^p{@;uNScI+WkZ6jY|`-r?D(6_{i5O`_T(UB0e)J^yuhHiXi?_>xq3fTD;_P?HdWCGhk2l-_Il(q_3bH>%&@NIm{3?l zBkc(B1s+*BJ9906M$$m?S0q5LfYs8I_hfabtd$>%AcWewP-I>mM#aV~0F)EQBYb*p z(mF=tp1XxWXUW)qzUUJ7{S2Q16DG4;)~{;o!S}wTgL1iWhFR-BROpd{Q-K~tfNs7j zLK0=NtyT0U+3YSYP$&{rqVPrOD()Xl5Q{xrh>da3%**N*N@PQ+weWOC!2E+Ps;O{1 z@iNl8_v6`@!B4R1b&YpHjVe>l>H9#;p1us~cAUtuAm@L9JNRq?;idE^U5|@}W2JX3 z0cgQ|%}Rd3KSUQDajK<29}#F_(bH?r?C>tcGk}q#zFl^1-QmO6S zuW)RE`6{KH`%C(7YQaNd9-Nkfwr>oCG+uhz@hM@va5CIq4qmFX z=q%$eLdJS)Pd#nIu#D#WLtWe4Q5Ls3}O4%s!d^z;tM6~v=AJZP^T7PDz=M|P z62388q@4}vi4G7qDJFKBWaYMww(BrQ>t5HsT^d2Do_@aUpP6+!i0JTa{(DNvvJYRs z`=7}hZg>UbJ4|2+^$F>ZgkJ=vEE4PN(tiv;+bKAhDArDvTGBS;l?Wn4c(dcrwzStc zg4aZ#phiq(Hel1hN#akXB>4U9rO!b~wC9*RP8~Y{>i)YWTRR!>>;||sBF8W6C;BAu zWe`x}u~wi^I%-nylYEjWdjsYOQ`JctSY>S*Hi-z|@&`ErVucAbCy4VRAvL}6^ZOqm zo~I9B>lUKJhHWe|v4R^OlkAhB#q<*-iJCo^bcylDceX z^!&(H6k7o1nHp8UK4WgWO!v-|IznO(0dR8AOI>KL(7r)c*4CRit*Y7O$g*_aSHjlt z9EL*@6x?SGn0%LjR!pKG`@y#S$n4znH)?BC!3SwTlwt$QwBrSFci{z0{fK5g&tewU z^t?#Xih6@jGnc=-!%hqq{bv)TN`pF;^5s46b#f?z64ck)cdk&DKXh zD9P1DZ`u$=XtfP$&H;Y~id}DB@)dG5i#pNt0?`n=_g?ojl|#s18v7fnD0HmE!x>5$ zsKObA*7@m;J!$rtgr!S5e3 zs`~4Dw(jucV4ZW{L@GEk$G}g;R8tjkF5|XR?v?z}Z~mdRlnQ;@%8>{|Ba^X-v`NP{ z^+ItPivo`YAt&vs!`HA(F;Dyeuzta*^Av6`-6M@QCcNjTkchw!RX{|=>wzXs(BKW+ zye%_8w|__JTEf6PVq9sW#pB%;SLIdRdw#rl?y|@2wwGvaT54MU6n^iuQJlu%fYUvz zgv0_$jV&+g%1W|%0 z0d$$P4&zZs2XG|jBZ-Q;q8v)w|07)jB+jmIWI1WvZ-YSAb40KX_yFKVdWt>bK2krM z5_HvMsB&;Wd;Z-Em?0R_3!8f`3LGVEksg`W@=PzeCb+6df7#J7*4)Z3D3E!twxN>7 zm`e>eqk2|r2=%%l7W+n+hUiT-3*Zi2<;6P{PiaZ?JzsNap8x1QYmtg=^Ge&WPYXf| z$FCyAepNbJ@JlbQuV4e!$9-rUCX$$+iKEkcVlp%J~g+l@w?5|l-0 z+ZG|SZ0NiOzDWQV>O|w4&pT?`N$}gPGg;7AIFU`->5Wf?i!@5Jz&E=0Zz=J&95t-7 z4II>)$S)MAf0g46EzZqfXsVI4ke*B-Rj9zfz=_Z)(=dJE2geEPs z>sM9{v;sKtal&b0LqF<7HgOrbl6>BS6-yw__UfCz`q!b#MRu=^j&HLi#ZhmPdWdgU zk3BP)3q7t1OKSd-`DDtJNy<&Frm=pSGL^A6y74GKzv!(F%{{_8NP$v##fqlqB#EZ< zJ+_XQ>ws zoz2L2{*R9nR?9;#2!EC*v0nfWE}rOyTm@Qr!fww5eU?saM^mh>@KC8#$FFXoyv>Ey zi(O25&6mQZfVp+@yd?bHZ1btkInVZ_O$&he>m|P};|^tr1}g!~Dq#9>Ekvx13gyOT z?X|ePrb1LDQNW^3dcPWZ<_-1Y{dTFiKsS;#uX>Ph?GFSaoq_xeEPfHEAqDi3WmGrO zAVPPt7v~Pw=MkwxlU@^2=+07LHu}%`%fO;;JiYOuTXNm1jvgXYuK_yhmllX0k?D4< z_=G4}kc!U<;!Y|IyzbKfU?ofu&tL)mjFe^P;eetvD1Dj;rKHWSIM@|{55mSPM6zb`rfz9=7e70U7QW`T*xaR@pHx8zaxU4j2?*gKA^+h2colqo)k2q!HWZ~j$>NYYwn@DaC&W!_w zl}`w-p*gMWYTi}vW?mj}!vqcx4IiSsd(}vNiaHWE9oJ>`%`{f{>Y{@*LZ&22x7|3T z-4tkg{5_k7>GRKGo}SlFfJLj-BDx%#vhvehz8i8H%TB#`%-FaQUVn%LCk2ZBRQ?ay ziElrnz}1gT;i8z!mqI})T7?xB#UO6 zEkBCBqo$?cS4WrNEYparaNSyTK~Fj20FA*{wd;~fCh&&xkw*yrDML6m9N)wX6aM-* zg2*9!KiuL?V`HZdBlK{CEBT8rI)G*W5cRG!gLEWwUR5SC`GQ#D6_9NchJ&z5m4`K4 zCQ9NA*+#SrtHdfHssnbGY5OlE>j&sE=dnO6w=0rD4JoSdfUT0=7YdyfhyR}P!d38= zd)B-DF-ob={2#;qOkkEAL}9cT;J z6I8s)bVCiliReP6bR9=z03KK^mqk2iGm{2FB$Z1OK5PU3S9cgSG<1LlhSUO7>sznT zmDndL1c>vJ_%BK>Iw@xnDDlZ3c#sMCFbo?%Qh!pF{?9QC$BpaBMC*xUkJ^$mCIx&{sLe7tPKgJ5~im6PNE=; z{}w__p`jiYH7F>#;_p*Teq#+7OC-83IFSkA4^4L2?tt?njSE4UZM}TEc2*q^I;0lh z5m~#;eO{IVF(7HNld4m3TiC@9s7KU-itRkta-g17s>Xmb7Q|$taF>{qa+fC*u|Qf* zFPzwyc#OkWj*urcCz^9gcfO`8-q$Nx_JR8u^8E#qP`|-j-8_9DD+(MX8{IO_g#gW& z*?z6plBY4XRD-6QVMpzI!Lklu7s)H(H5y#3xqPwEVrw;hus365>kuN*upd;+z^r5x z7dwa3!rb_*@_w4N|Q)d^iLj35vuTYe_4ojxE zS|+(z@7A#G4rEeb&l5+>pxcdT*sRnv)IjIeEjOrPAL$s=CINUb{sBgi6o0E@QGTBA z5sNwuPsWHTzsdhX+->kjXcIF$L>uLCiZbcLk8;w8AdfgzbPUh6lG->e^->I5O8qZW z*FG_*B%?k))sPO=K66Lh5#1+WfCZo{c_ly@O0hr*4wyd!{6*4DkQdsdGq$BBK)v92 zmw>$5Q~iL>pU6La&8BMEunnJ0etEXa&fVw5_0L;_-3ee_|A)87kbM2aM?h^CT;;js zz?2w}Ql*Lsr%uuI){-ExVN6>JUF73l{3#wy!%@k>S$NvVejI#6*B~~5h!xKIev62- zC;_F@mo(tJHEy|w#)48l*NvQK+XZ{b1CYBYtu5yOK$JFBBlzzk{mF@NSaOf!(^6X^ zHye{Af@v*`{G~ztz>aQ6vy_H_?&A|ZATh>KcPcN3Gw`SJ|0d`gj4kT-yVFm2+eh4k z+QQ2f*3q65deQMms6Y6`TubdBgVKur%R8P-Kd{qur?QOQ+LoL5i-5_BMxNxiXGZ9> z#GlLtY&|v?WUZ`8X*S!KC*+nq4~2d7T&C3l@~Y?d7WCg70`o z4DaS`BJPvpm>49xkqiOo#ORvOWxoJW60mv1@kvvf?*BOz+#mXly^ZwAzh>)DM)kzv zNfk&XZ9bqi5J0_21oc@v#mtYgexWxLY+MMKfn`3uoXYsrBLLn(P|PyuT}Cu|C!H;_ z=3H!8bqdMTy8gXi@6$&PX0)q?v(d*BuE&N)*UTsPO2=G(mRba?i-EjYo!9hJd%iGQ)3LDv8qmmcN!_HNo2xI`M|j_n=E%>7kk*YbE7 z4GV)u_j?4`?YTPJvG>Ol^r)m?7^~^-8oBj56;cDv9*#)#CfPy^5w_AVZK%E9u{HYw zLW=4l8^Rnd%MK0y>blI6 z?Tdt#8Xyi;bUzapUozR9W_B2{- z5xh7&(37+%d)nzxgdNm)(+HOx7i#2VR2%Dzq)m-)BXQH|z`777V3+EQK1wi$KL=tf zLGDij=ohwm8nf(9`R33QjEgO*FBsi+B2bS49(0T>DKMQc#Zbc!2YRd3icY`;0&s53)F3uqYf&(%JxIbJM&Grs8m zZhWyB=ng1h661u{$;*C(JZENr{x6#4?-=VUH`wEiwrp8HkJBsrDt~S1it6eAV!yeL{to2(z zW2d=K{6U*>bt02K)v`JU4mp{Lp9`eu`$*)3G1qXkhC?2sTXR7$= z@y7d(XP}-~R29@^r_@C3!C6&xPyZ1Wv^v_ z_p6N9PY)M<6VsIN8Pxq(j%mup)44`oQDf3J3*2E{J2aiV25e1SvOGbxz_cwyo>^tx_Xk)$!@e0y1vopI4R8J+tl6DXoaQ5$c`q z=o+pDBp;(zI;d$cVJf*iSMP8s{sE@qYzyqJ)h_qZ7G6%U(X6F{XWv~p0-?Jq>;TyO z7*}H3cj|uXM!A^8fC;=R+y#q$x4_r_m{o~&Yw~s^RI66E+Zk@EVy#I2#WsCQECbx8 z*BdltkBaE0eI)ec7&_C6oOxDfR5M1pJu_{6D!ek+Fwt!G@kSs&w0^mHC$;!p&D(N| zp;i8%bTqS%JZ3o;7{`+=I(GG>s?jn{8TvwL`ID10hC!(@I{#~Ot>PP@{%h`QUn-oC zltC;At6Ep)d=zCADC%(m6;Z!vf-IBxhk9NA)pm;^V5p|H06$RdYGC8q1-*hKoeaa! z^RWrnXaE4cdZ&HJLot><78_j~8d5_Ls{nbtw#{$h>?s03QPSTu*eQO* zS06e>Q*|T?c4`28E3djg1QIv2Gb5nn(A)eBT53bGndeuz$hyn}1`wEB{GKjujwS@^ zJyp>#g$x17{zfA@i)^ou!Hnw?&F>Fbqx`;S;m?7_WVErxGwNb7Ku}XMUF&xC4O55a zAzHNbRRE=DKPCE5Yu{?|0@$1v?byIQ2*7)PiiBL7=D*u;r0lYIB6`Y&wR~IlT5z)b zxp3T$h+-&Aq#?n)bj#RpJI^2Fe@s7gXb?}~3q zh*izxI9-CEZ9m->`P%q;>YtvFzq)Gc+dSK>bxjmQ{oGtV6vwz&YE zRnp^(eNn#L{v+mcq(ya?*UPWD$Pzr%5(JyA_b`oAd{wn;pP!isIV?JoXQU^Ef)iN zGxTcsmecJ5=d-8}_A)9mVm#x**!|T1kV7eWT0aApmoL>X1|c3m5S^hRdotG!+FKT^ z#}<6cbwUA5Gzz=7AEm?U2MIXn5{~w`j#td_;EzwotyNPcMrTn^FZ+)Y^hAOIrcoFm zXIGF>a^IcrrXp43b=;Kekk(~&jRrF>cNDjUtG7IC1;+*9M4K1Ebv#Lkw16688jk zt6S|p(<_%A|2$%{I-{=h!6S~GREg=zwQ>Pz#sl7lW@}g*lMG-k%G?(gS8NFN^9Az0 z1ZJ|+3enl734pJmeGn6JO+NrpKrNX)!!or1$ue<8OHHb5x}8RP4`~3mW3Z>NcXE|K zYuEtqF*$sM3`#J*J#i?#gA>;foPRx`&SzlJnoEx*=k!Mk_ApJwrPxSsM8#a;0@K|C z0Czw0VqA})bmvWosC8EHJELnnN&fL3q{>B(UbJwUa5zuhdZIk#(YF9aVn0TgNs2BINhl6Abw zvKOg-^}!Z0&~dL_M$A=X4idoTwN9(S1g5Ph< zn^KpT0g*@tp_~Z{h$0jib;9d+dWpRR3ohi$(Zlz%N19K)^jG}c>T1M`FBr&=Rk`h9 zUgS3M7?8in^E43QQ>J5!5t>eaaOX%ItXpjdouLAk|C~cG$;a?uREb6boxSFCRx;vS zmEn!0__}r*iOl9r;hrAI67Iqwf=)S4|D=h;CZGx$ndb?=a#&V!P0z5j99zf8xO%wf zgCav%L%KAcsNk#SP)jti)iEep(?znm<6_woQahG)MT|hO`*wsSWn4|d!U%DyO28DM z5)<48z^vTxwH<{>>`rGizWTknd+(z6o<7{4Dw3x`j34mxSv~^*cj+u|@>w(ix5oLr z{z{6|(#Zb&FEP?|&ONwLc_)MzH?sqrv_Sn%Em{cZP`!tZC?PcMwm) z+i-&wWk=VU50puDc}7(BKTmn21Wr*gwC*Q}Sl7`E8BsnuMEzd8>IrO>_=)UUV}CT9 z)pLZLG|9WoG40vGm`!_cM2O?m1&+0f${n)X>GM1Y_)~8vy)u&#%eCg|D+Ke?4G*53 zG`pK$Y@a(?5;>$Ag}tb?+mUo229X_nThB1?FHeoRu`*A8a-l;@wv0*n0iddLd^30a z5AuHT8M1!bK5~CdK$<`4$G%)U>+Q}=a|jT>NU?rCnu=6c_d2HKb z(@s>-4sOUzd4{ffckT5O@6$gg_7uRVBQ>HFpxm>5O|j#(<7H4qI0>k*6EMOd!7eC0 z-j9OXbhz&N^;s7!T zBt*WfVm33|+ewnJvxy|&vMs4b5gKXHbPvz@T~gLgXa|@z2Ct2cKXcmjKqYUn8{8M^ z8a%JuSHk{?GmUmz`)S-|h*Z$=3N<GD9@XvVL?NG8odFw==WpxD~K_@?NqY=aKqpz043JB z{HT=OOWR4hG3^x)YXHgpzRo5O61N8=2V*BbSf(*{qd8B+8nBn4gH1`W!#4jKTG(hY zY(!fm0tk~o-Ru}U%UVlyYS4;c9dTv9ft;Qqsu1P{)x3F;r6`_Zm&(IkklO*w^4n2A z_k^n&4KF+!xzS@i8zKKtV5EMfX0(UJgbRlvc|$686E1~5D;@MFOHnGo?DyC2AhxY_ zV#5~9Iw<(V&Qj9+P; zXTwK7y*}t{+A?#1doK1e{HTLiJn6j@yi$_~x~B5xvyM|?!*Z~WMw9KAKn~=iY5=9_ zAW)VYNu8E*q9MP${Q8SAM*LNYJ_5jW2`G?OEeLk1gT=`I8SMvxOglbKdy?5DNMnts z@gYSu-vn$Qfe}UPyz=UZ&gW+eGdUVc?ILuu@Ij%&k?ZNaYgA4fKEX`1ygrvjb|@dL z1qe_hQ{e+;!$J0I7X8&wNSx8>J_Ut7M27I4IQ5|yREBtlV)Dv z$`GlIx%ITo(6l*M+$fz+%4b=3LJntM;)7WJU;0ssm%V%T>A>ku!wJ_Wemai6Y_Q8Y9{V2 z3GZvA7KV8dYshzF-6u5eg=gu^S^@r2-akY=k)812QQEViMuZEFOo%UWqnCcBedIm` zL|7XulnH19H#QMwSt-@yDP4sdA(!3U0LrXI-w>NbF%rC zn^io5T)5p6 zoKrpz#!y@asv(vmd8e2^;pD>htSVZQr$6S;o62R5(s8$!gc23J6q6eIaXq3kV=w9In_WJXfCfB2J>|PmvoXL~YMVJ6Xmn(iKl^j^ij&!B``XCRB z$Wu`_+hUAe6;5Q>fIS1*A0C>>zT;?@em~M5cPHy5AFaM`K}}A&3yjXgQY5M|B);42 z{eVRtsv@OABFSRc|8ZAAR3Jh=n)~@uA|u90qUG9ey+}Y?A%nt`?MKD~=9PS&*~t`}8Xn)t_wRpgrhHG7 zrVMYnmagC6B$fxFLi9n{Kxe_WsJR8<LOTX_D3<6R=^hl@9CBhutHP1v#&(`$s)B^f^45oRKnR zH&W9mYCP7Uq(fs4?26VoCWpgly`!zVJd8OoO!Q;)%7Gz+8?1nV`W>!cuT|;s*`6qj zsd`;YR|6Q=4UqOPuz*;=s{&v((dX9hA%F~%XNlBqd}s3?7JPi?s|Y&TByHUln=#oC zV{qD7ooRyYd+5=JguzP(&N}@iPoCRg&m*lzM|a_FZGFi)PmN4G;cCygJyGuD{gjUQ zzo%AvQLfW3%HMD!9U3sA{YQ8G%(@>Q-m{6q?bxq}tf|ain4;i<8T}Ayl>4+Lz}o4n znJw1+=DK1ww|%0|(NbmE&&L??+l=CQXj=bjN6-HzaVlgLM&*74Dz;~AJH7_ygmWu% zomeIb$$$ODasOu6|D*T@nqPli1phrP;IGHvzhw9S?sES}jQ;=hax+vzidKB{vL5lj z{Jg4M16esRtdc1&DSP9fz!$LzcLBrOGYtdBTCn zFA!~@+j8dm8y9)48+Z9TQQ<$?m;bMyu!~jzTs21Y8z2FM6EPaOtGk5< zyx|GnB23*Mfndwu;DPJWLMCv&sR}(W_LtLBQc%EJAB90;7W~?OC(eK2Iu_lUX{IG= zqKhjzi+nsQ`6LYbF;l7KhVcNh&KWy+9xTHIOy(jNzsmdUvOj%u_5?@3NZy1y{{|BT Pe0-9S7q9xDANaojyAl%a literal 0 HcmV?d00001 diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/README.md b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/README.md new file mode 100644 index 0000000000..76083a1c26 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/README.md @@ -0,0 +1,67 @@ +## build_image.sh + +This script allows you to create a custom docker image and push on ECR + +Parameters: +* IMAGE_NAME: *Mandatory* - Name of the image you want to build +* REGISTRY_NAME: *Mandatory* - Name of the ECR repository you want to use for pushing the image +* IMAGE_TAG: *Mandatory* - Tag to apply to the ECR image +* DOCKER_FILE: *Mandatory* - Dockerfile to build +* PLATFORM: *Optional* - Target architecture chip where the image is executed +``` +./build_image.sh +``` + +Examples: + +``` +./build_image.sh image_tensorrt nvidia-tensorrt-21.08 latest Dockerfile linux/amd64 +``` + +## create_studio_image.sh + +This script allows you to create the Amazon SageMaker Studio Image + +Parameters: +* IMAGE_NAME: *Mandatory* - Name of the folder for the image +* REGISTRY_NAME: *Mandatory* - Name of the ECR repository where image is stored +* SM_IMAGE_NAME: *Mandatory* - Name of the image you want to create +* ROLE_ARN: *Mandatory* - Used to get ECR image information when and Image version is created + +``` +./create_studio_image.sh +``` + +Examples: + +``` +./create_studio_image.sh image_tensorrt nvidia-tensorrt-21.08 nvidia-tensorrt-21-08 arn:aws:iam:::role/mlops-sagemaker-execution-role +``` + +## update_studio_image.sh + +This script allows you to create the Amazon SageMaker Studio Image + +Parameters: +* IMAGE_NAME: *Mandatory* - Name of the folder for the image +* REGISTRY_NAME: *Mandatory* - Name of the ECR repository where image is stored +* SM_IMAGE_NAME: *Mandatory* - Name of the image you want to create +* ROLE_ARN: *Mandatory* - Used to get ECR image information when and Image version is created + +``` +./update_studio_image.sh +``` + +Examples: + +``` +./update_studio_image.sh image_tensorrt nvidia-tensorrt-21.08 nvidia-tensorrt-21-08 arn:aws:iam:::role/mlops-sagemaker-execution-role +``` + +## update_studio_domain.sh + +This script allows you to create the Amazon SageMaker Studio Image + +``` +./update_studio_domain.sh +``` \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/build_image.sh b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/build_image.sh new file mode 100755 index 0000000000..e532b7bc21 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/build_image.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +# The name of our algorithm +repo=$1 +registry_name=$2 +image_tag=$3 +docker_file=$4 +platforms=$5 + +echo "[INFO]: registry_name=${registry_name}" +echo "[INFO]: image_tag=${image_tag}" +echo "[INFO]: docker_file=${docker_file}" +echo "[INFO]: platforms=${platforms}" + +cd $repo + +account=$(aws sts get-caller-identity --query Account --output text) + +# Get the region defined in the current configuration (default to us-west-2 if none defined) +region=$(aws configure get region) + +echo "[INFO]: Region ${region}" + +fullname="${account}.dkr.ecr.${region}.amazonaws.com/${registry_name}:${image_tag}" + +echo "[INFO]: Image name: ${fullname}" + +# If the repository doesn't exist in ECR, create it. + +aws ecr describe-repositories --repository-names "${registry_name}" > /dev/null 2>&1 + +aws ecr create-repository --repository-name "${registry_name}" > /dev/null + +## If you are extending Amazon SageMaker Images, you need to login to the account +# Get the login command from ECR and execute it directly +password=$(aws ecr --region ${region} get-login-password) + +docker login -u AWS -p ${password} "${account}.dkr.ecr.${region}.amazonaws.com" + +if [ -z ${platforms} ] +then + docker build -t ${fullname} -f ${docker_file} . +else + echo "Provided platform = ${platforms}" + docker build -t ${fullname} -f ${docker_file} . --platform=${platforms} +fi + +docker push ${fullname} \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/create_studio_image.sh b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/create_studio_image.sh new file mode 100755 index 0000000000..80b868b37e --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/create_studio_image.sh @@ -0,0 +1,54 @@ +#!/bin/sh + +IMAGE_NAME=$1 +REGISTRY_NAME=$2 +SM_IMAGE_NAME=$3 +ROLE_ARN=$4 + +if [ -z ${IMAGE_NAME} ] +then + echo "[INFO]: IMAGE_NAME not passed" + exit 1 +fi + +if [ -z ${REGISTRY_NAME} ] +then + echo "[INFO]: REGISTRY_NAME not passed" + exit 1 +fi + +if [ -z ${SM_IMAGE_NAME} ] +then + echo "[INFO]: SM_IMAGE_NAME not passed" + exit 1 +fi + +if [ -z ${ROLE_ARN} ] +then + echo "[INFO]: ROLE_ARN not passed" + exit 1 +fi + +echo "[INFO]: IMAGE_NAME=${IMAGE_NAME}" +echo "[INFO]: REGISTRY_NAME=${REGISTRY_NAME}" +echo "[INFO]: SM_IMAGE_NAME=${SM_IMAGE_NAME}" +echo "[INFO]: ROLE_ARN=${ROLE_ARN}" + +aws sagemaker create-image \ + --image-name ${SM_IMAGE_NAME} \ + --role-arn ${ROLE_ARN} \ + || exit 1 + +account=$(aws sts get-caller-identity --query Account --output text) +region=$(aws configure get region) + +aws sagemaker create-image-version \ + --image-name ${SM_IMAGE_NAME} \ + --base-image "${account}.dkr.ecr.${region}.amazonaws.com/${REGISTRY_NAME}:latest" \ + || exit 1 + +aws sagemaker delete-app-image-config --app-image-config-name ${SM_IMAGE_NAME}-config + +aws sagemaker describe-image-version --image-name ${SM_IMAGE_NAME} + +aws sagemaker create-app-image-config --cli-input-json file://${IMAGE_NAME}/app-image-config.json \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/Dockerfile b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/Dockerfile new file mode 100644 index 0000000000..eed1a58ccc --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/Dockerfile @@ -0,0 +1,5 @@ +FROM nvcr.io/nvidia/pytorch:21.08-py3 + +RUN pip install sagemaker transformers==4.9.1 tritonclient[all] + +RUN pip install ipykernel && python -m ipykernel install --sys-prefix \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/app-image-config.json b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/app-image-config.json new file mode 100644 index 0000000000..afe58441a9 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/image_tensorrt/app-image-config.json @@ -0,0 +1,16 @@ +{ + "AppImageConfigName": "nvidia-tensorrt-21-08-config", + "KernelGatewayImageConfig": { + "KernelSpecs": [ + { + "Name": "python3", + "DisplayName": "Python3" + } + ], + "FileSystemConfig": { + "MountPath": "/root", + "DefaultUid": 0, + "DefaultGid": 0 + } + } +} \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/studio-domain-config.json b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/studio-domain-config.json new file mode 100644 index 0000000000..73df5f3928 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/studio-domain-config.json @@ -0,0 +1,13 @@ +{ + "DomainId": "$DOMAIN_ID", + "DefaultUserSettings": { + "KernelGatewayAppSettings": { + "CustomImages": [ + { + "ImageName": "nvidia-tensorrt-21-08", + "AppImageConfigName": "nvidia-tensorrt-21-08-config" + } + ] + } + } +} \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_domain.sh b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_domain.sh new file mode 100755 index 0000000000..dde7dbe5a2 --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_domain.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +echo "studio-domain-config.json" + +aws sagemaker update-domain --cli-input-json file://studio-domain-config.json \ No newline at end of file diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_image.sh b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_image.sh new file mode 100755 index 0000000000..36d860b07c --- /dev/null +++ b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/studio-image/update_studio_image.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +IMAGE_NAME=$1 +REGISTRY_NAME=$2 +SM_IMAGE_NAME=$3 +ROLE_ARN=$4 + +if [ -z ${IMAGE_NAME} ] +then + echo "[INFO]: IMAGE_NAME not passed" + exit 1 +fi + +if [ -z ${REGISTRY_NAME} ] +then + echo "[INFO]: REGISTRY_NAME not passed" + exit 1 +fi + +if [ -z ${SM_IMAGE_NAME} ] +then + echo "[INFO]: SM_IMAGE_NAME not passed" + exit 1 +fi + +if [ -z ${ROLE_ARN} ] +then + echo "[INFO]: ROLE_ARN not passed" + exit 1 +fi + +echo "[INFO]: IMAGE_NAME=${IMAGE_NAME}" +echo "[INFO]: REGISTRY_NAME=${REGISTRY_NAME}" +echo "[INFO]: SM_IMAGE_NAME=${SM_IMAGE_NAME}" +echo "[INFO]: ROLE_ARN=${ROLE_ARN}" + +account=$(aws sts get-caller-identity --query Account --output text) +region=$(aws configure get region) + +aws sagemaker create-image-version \ + --image-name ${SM_IMAGE_NAME} \ + --base-image "${account}.dkr.ecr.${region}.amazonaws.com/${REGISTRY_NAME}:latest" \ + || exit 1 + +aws sagemaker delete-app-image-config --app-image-config-name ${SM_IMAGE_NAME}-config + +aws sagemaker describe-image-version --image-name ${SM_IMAGE_NAME} + +aws sagemaker create-app-image-config --cli-input-json file://${IMAGE_NAME}/app-image-config.json \ No newline at end of file