updated sagemaker triton to v22.09

aws · Oct 25, 2022 · 455f54f · 455f54f
1 parent 2438cff
commit 455f54f
Showing 1 changed file with 10 additions and 11 deletions.
diff --git a/multi-model-endpoints/mme-on-gpu/cv/resnet50_mme_with_gpu.ipynb b/multi-model-endpoints/mme-on-gpu/cv/resnet50_mme_with_gpu.ipynb
@@ -119,7 +119,7 @@
     "runtime_sm_client = boto3.client(\"sagemaker-runtime\")\n",
     "sagemaker_session = sagemaker.Session(boto_session=boto3.Session())\n",
     "bucket = sagemaker_session.default_bucket()\n",
-    "prefix = \"resnet-mme-gpu\"\n",
+    "prefix = \"resnet-mme-gpu-v1\"\n",
     "\n",
     "# endpoint variables\n",
     "sm_model_name = f\"{prefix}-mdl-{ts}\"\n",
@@ -159,7 +159,7 @@
     "\n",
     "base = \"amazonaws.com.cn\" if region.startswith(\"cn-\") else \"amazonaws.com\"\n",
     "mme_triton_image_uri = (\n",
-    "    \"{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:22.07-py3\".format(\n",
+    "    \"{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:22.09-py3\".format(\n",
     "        account_id=account_id_map[region], region=region, base=base\n",
     "    )\n",
     ")"
@@ -200,7 +200,7 @@
    "outputs": [],
    "source": [
     "!docker run --gpus=all --rm -it \\\n",
-    "            -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
+    "            -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.09-py3 \\\n",
     "            /bin/bash generate_model_pytorch.sh"
    ]
   },
@@ -289,7 +289,7 @@
    "outputs": [],
    "source": [
     "!docker run --gpus=all --rm -it \\\n",
-    "            -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
+    "            -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.09-py3 \\\n",
     "            /bin/bash generate_model_trt.sh"
    ]
   },
@@ -457,8 +457,7 @@
     "container = {\n",
     "    \"Image\": mme_triton_image_uri,\n",
     "    \"ModelDataUrl\": model_data_url,\n",
-    "    \"Mode\": \"MultiModel\",\n",
-    "    \"Environment\": {\"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME\": \"resnet\"},\n",
+    "    \"Mode\": \"MultiModel\"\n",
     "}"
    ]
   },
@@ -577,7 +576,7 @@
     "\n",
     "Amazon SageMaker multi-model endpoints supports automatic scaling (auto scaling) for your hosted models. Auto scaling dynamically adjusts the number of instances provisioned for a model in response to changes in your workload. When the workload increases, auto scaling brings more instances online. When the workload decreases, auto scaling removes unnecessary instances so that you don't pay for provisioned instances that you aren't using.\n",
     "\n",
-    "In the below scaling policy, use a custom metric GPUMemoryUtilization in TargetTrackingScalingPolicyConfiguration configuration and set a TargetValue of 60.0 for the target value of that metric. This autoscaling policy will provision additional instances upto MaxCapacity when GPU Utilization is more than 60%."
+    "In the below scaling policy, use a custom metric GPUUtilization in TargetTrackingScalingPolicyConfiguration configuration and set a TargetValue of 60.0 for the target value of that metric. This autoscaling policy will provision additional instances upto MaxCapacity when GPU Utilization is more than 60%."
    ]
   },
   {
@@ -602,16 +601,16 @@
     "\n",
     "# GPUMemoryUtilization metric\n",
     "response = auto_scaling_client.put_scaling_policy(\n",
-    "    PolicyName=\"GPUMemoryUtil-ScalingPolicy\",\n",
+    "    PolicyName=\"GPUUtil-ScalingPolicy\",\n",
     "    ServiceNamespace=\"sagemaker\",\n",
     "    ResourceId=resource_id,\n",
     "    ScalableDimension=\"sagemaker:variant:DesiredInstanceCount\",  # SageMaker supports only Instance Count\n",
     "    PolicyType=\"TargetTrackingScaling\",  # 'StepScaling'|'TargetTrackingScaling'\n",
     "    TargetTrackingScalingPolicyConfiguration={\n",
-    "        # Scale out when GPU Memory utilization hits GPUMemoryUtilization target value.\n",
+    "        # Scale out when GPU utilization hits GPUUtilization target value.\n",
     "        \"TargetValue\": 60.0,\n",
     "        \"CustomizedMetricSpecification\": {\n",
-    "            \"MetricName\": \"GPUMemoryUtilization\",\n",
+    "            \"MetricName\": \"GPUUtilization\",\n",
     "            \"Namespace\": \"/aws/sagemaker/Endpoints\",\n",
     "            \"Dimensions\": [\n",
     "                {\"Name\": \"EndpointName\", \"Value\": endpoint_name},\n",
@@ -809,7 +808,7 @@
     "        header_length\n",
     "    ),\n",
     "    Body=request_body,\n",
-    "    TargetModel=\"resnet_pt_v1.tar.gz\",\n",
+    "    TargetModel=\"resnet_pt_v0.tar.gz\",\n",
     ")\n",
     "\n",
     "# Parse json header size length from the response\n",