Merge branch 'main' into dependabot/pip/training/heterogeneous-cluste…

…rs/tf.data.service.sagemaker/code/protobuf-3.20.2
aws · Oct 12, 2022 · 172262a · 172262a
2 parents 6503a3d + 4c834f0
commit 172262a
Show file tree

Hide file tree

Showing 55 changed files with 4,403 additions and 19 deletions.
diff --git a/advanced_functionality/index.rst b/advanced_functionality/index.rst
@@ -0,0 +1,103 @@
+Advanced Functionality
+=================================
+
+.. raw:: html
+
+    <div style="position: relative; padding-bottom: 5%; height: 0; overflow: hidden; max-width: 100%; height: auto;">
+        <iframe width="560" height="315" src="https://www.youtube.com/embed/wiDHCWVrjCU" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+    </div>
+
+
+Advanced Algorithms
+-------------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   pytorch_bring_your_own_gan/build_gan_with_pytorch
+   pytorch_deploy_pretrained_bert_model/pytorch_deploy_pretrained_bert_model
+   mxnet_mnist_byom/mxnet_mnist
+
+
+
+Parallelism with Data Distribution
+--------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   data_distribution_types/data_distribution_types
+
+
+Multi-Model Endpoints
+-------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   multi_model_pytorch/pytorch_multi_model_endpoint
+   multi_model_catboost/multi_model_catboost
+
+Prebuilt Deep Learning Containers
+--------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   autogluon-tabular-containers/AutoGluon_Tabular_SageMaker_Containers
+
+
+SageMaker Pipeline
+--------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   autogluon-sagemaker-pipeline/sagemaker-pipelines-project
+
+
+Multi Container Endpoint
+--------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   multi-container-endpoint/direct-invocation/multi-container-direct-invocation
+
+
+Bring Your Own Algorithm Container
+--------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   multi_model_bring_your_own/multi_model_endpoint_bring_your_own
+
+
+Bring Your Own Pipe-Mode Algorithm
+-------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   pipe_bring_your_own/pipe_bring_your_own
+
+
+Bring Your Own Model
+-------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   scikit_learn_bring_your_own_model/scikit_learn_bring_your_own_model
+
+
+Casual Inference Container
+-------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   causal-inference/causal-inference-container
+
+
diff --git a/autopilot/index.rst b/autopilot/index.rst
@@ -9,6 +9,9 @@ Get started with Autopilot
    sagemaker_autopilot_direct_marketing
    sagemaker_autopilot_abalone_parquet_input
    sagemaker_autopilot_neo4j_portfolio_churn
+   autopilot_customer_churn_high_level_with_evaluation
+   autopilot_california_housing
+   autopilot_customer_churn
 
 
 Feature selection

diff --git a/aws_sagemaker_studio/index.rst b/aws_sagemaker_studio/index.rst
@@ -19,8 +19,22 @@ Basic training example for SageMaker Studio
    :maxdepth: 1
 
    sagemaker_algorithms/linear_learner_mnist/linear_learner_mnist
+   getting_started/xgboost_customer_churn_studio
 
 
+SageMaker-Debugger
+-------------------------------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   sagemaker_debugger/mnist_tensor_plot/mnist-tensor-plot
+   sagemaker_debugger/mxnet_realtime_analysis/mxnet-realtime-analysis
+   sagemaker_debugger/tensorflow_builtin_rule/tf-mnist-builtin-rule
+   sagemaker_debugger/pytorch_profiling/pt-resnet-profiling-single-gpu-single-node
+   sagemaker_debugger/tensorflow_action_on_rule/detect_stalled_training_job_and_actions
+   sagemaker_debugger/tensorflow2/tensorflow2_keras_custom_container/tf2-keras-custom-container
+
 
 Framework examples
 =======================
@@ -32,6 +46,7 @@ Framework examples
    frameworks/mxnet_mnist/mxnet_mnist_with_batch_transform
    frameworks/mxnet_onnx_ei/mxnet_onnx_ei
    frameworks/pytorch_cnn_cifar10/pytorch_cnn_cifar10
+   frameworks/tensorflow_mnist/tensorflow_mnist
 
 
 Model compilation with Neo

diff --git a/frameworks/index.rst b/frameworks/index.rst
@@ -0,0 +1,7 @@
+Getting Started with MXNet and Gluon
+=====================================
+
+.. toctree::
+   :maxdepth: 1
+
+   frameworks/mxnet/get_started_mnist_train
diff --git a/index.rst b/index.rst
@@ -105,6 +105,11 @@ More examples
    aws_sagemaker_studio/index
    sagemaker-lineage/index
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Introduction to Amazon Algorithms
+
+   introduction_to_amazon_algorithms/index
 
 .. toctree::
    :maxdepth: 1
@@ -192,7 +197,7 @@ More examples
    :caption: Inference
 
    inference/index
-
+   model-governance/index
 
 .. toctree::
    :maxdepth: 1
@@ -203,6 +208,12 @@ More examples
    sagemaker-spark/index
    step-functions-data-science-sdk/index
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Advanced Functionality
+
+   advanced_functionality/index
+   serverless-inference/index
 
 .. toctree::
    :maxdepth: 1
@@ -212,15 +223,8 @@ More examples
    scientific_details_of_algorithms/index
    aws_marketplace/index
 
-
-
 .. toctree::
    :maxdepth: 1
    :caption: Community examples
 
    contrib/index
-
-
-
-
-
diff --git a/inference/index.rst b/inference/index.rst
@@ -62,6 +62,18 @@ Elastic inference
    ../sagemaker-python-sdk/mxnet_onnx_eia/mxnet_onnx_eia
 
 
+Inference-Recommender
+----------------------
+
+.. toctree::
+   :maxdepth: 1
+
+   ../sagemaker-inference-recommender/inference-recommender
+   ../sagemaker-inference-recommender/xgboost/xgboost-inference-recommender
+   ../sagemaker-inference-recommender/sklearn-inference-recommender/sklearn-inference-recommender
+   ../sagemaker-inference-recommender/tensorflow-cloudwatch/tf-cloudwatch-inference-recommender
+   ../sagemaker-inference-recommender/huggingface-inference-recommender/huggingface-inference-recommender
+
 Endpoints
 ---------
 

diff --git a/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md b/inference/nlp/realtime/huggingface/sentence-transformers-triton-ensemble/README.md
@@ -0,0 +1,38 @@
+# NVIDIA Triton Inference Server on SageMaker - Hugging Face Sentence Transformers
+
+## Introduction
+
+[HuggingFace Sentence Transformers](https://huggingface.co/sentence-transformers) is a Machine Learning (ML) framework and set of pre-trained models to
+extract embeddings from sentence, text, and image. The models in this group can also be used with the default methods exposed through the [Transformers](https://www.google.com/search?q=transofrmers+githbu&rlz=1C5GCEM_enES937ES938&oq=transofrmers+githbu&aqs=chrome..69i57.3022j0j7&sourceid=chrome&ie=UTF-8) library.
+
+[NVIDIA Triton Inference Server](https://github.com/triton-inference-server/server/) is a high-performance ML model server, which enables the deployment of ML models in an easy, scalable, and cost-effective way. It also exposes many easy-to-use optimization features to make the most of the underlying hardware, in particular NVIDIA GPU's.
+
+In this example, we walk through how you can:
+* Create an Amazon SageMaker Studio image based on the official [NVIDIA PyTorch](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) image, which includes the necessary dependencies to optimize your model
+* Optimize a pre-trained HuggingFace Sentence Transformers model with NVIDIA TensorRT to enable high-performance inference
+* Create a Triton Model Ensemble, which will allow you to run in sequence a pre-processing step (input tokenization), model inference and post-processing, where sentence embeddings are computed from the raw token embeddings 
+
+This example is meant to serve as a basis for use-cases in which you need to run your own code before and/or after your model, allowing you to optimize the bulk of the computation (the model) using tools such as TensorRT.
+
+<img src="images/triton-ensemble.png" alt="Triton Model Ensamble" />
+
+#### ! Important: The example provided can be tested also by using Amazon SageMaker Notebook Instances
+
+### Prerequisites
+
+1. Required NVIDIA NGC Account. Follow the instruction https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account
+
+##  Step 1: Clone this repository
+
+##  Step 2: Build Studio image
+
+In this example, we provide a [Dokerfile](./studio-image/image_tensorrt/Dockerfile) example to build a custom image for SageMaker Studio.
+
+To build the image, push it and make it available in your Amazon SageMaker Studio environment, edit [sagemaker-studio-config](./studio-image/studio-domain-config.json) by replacing `$DOMAIN_ID` with your Studio domain ID.
+
+We also provide automation scripts in order to [build and push](./studio-image/build_image.sh) your docker image to an ECR repository
+and [create](./studio-image/create_studio_image.sh) or [update](./studio-image/update_studio_image.sh) an Amazon SageMaker Image. Please follow the instructions in the [README](./studio-image/README.md) for additional info on the usage of this script.
+
+## Step 3: Compile model, create an Amazon SageMaker Real-Time Endpoint with NVIDIA Triton Inference Server
+
+Clone this repository into your Amazon SageMaker Studio environment and execute the cells in the [notebook](./examples/triton_sentence_embeddings.ipynb)
diff --git a/...gingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt b/...gingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/bert-trt/config.pbtxt
@@ -0,0 +1,32 @@
+name: "bert-trt"
+platform: "tensorrt_plan"
+max_batch_size: 16
+input [
+  {
+    name: "token_ids"
+    data_type: TYPE_INT32
+    dims: [128]
+  },
+  {
+    name: "attn_mask"
+    data_type: TYPE_INT32
+    dims: [128]
+  }
+]
+output [
+  {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [128, 384]
+  },
+  {
+    name: "854"
+    data_type: TYPE_FP32
+    dims: [384]
+  }
+]
+instance_group [
+    {
+      kind: KIND_GPU
+    }
+  ]
diff --git a/...sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md b/...sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/1/README.md
@@ -0,0 +1 @@
+Do not delete me!
diff --git a/...gingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt b/...gingface/sentence-transformers-triton-ensemble/examples/ensemble_hf/ensemble/config.pbtxt
@@ -0,0 +1,70 @@
+name: "ensemble"
+platform: "ensemble"
+max_batch_size: 16
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  }
+]
+output [
+  {
+    name: "finaloutput"
+    data_type: TYPE_FP32
+    dims: [384]
+  }
+]
+ensemble_scheduling {
+  step [
+    {
+      model_name: "preprocess"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "token_ids"
+      }
+      output_map {
+        key: "OUTPUT1"
+        value: "attn_mask"
+      }
+    },
+    {
+      model_name: "bert-trt"
+      model_version: -1
+      input_map {
+        key: "token_ids"
+        value: "token_ids"
+      }
+      input_map {
+        key: "attn_mask"
+        value: "attn_mask"
+      }
+      output_map {
+        key: "output"
+        value: "output"
+      }
+    },
+    {
+      model_name: "postprocess"
+      model_version: -1
+      input_map {
+        key: "TOKEN_EMBEDS_POST"
+        value: "output"
+      }
+      input_map {
+        key: "ATTENTION_POST"
+        value: "attn_mask"
+      }
+      output_map {
+        key: "SENT_EMBED"
+        value: "finaloutput"
+      }
+
+    }
+  ]
+}