Skip to content

Commit

Permalink
updated sagemaker triton to v22.09
Browse files Browse the repository at this point in the history
  • Loading branch information
vikramelango committed Oct 25, 2022
1 parent 2438cff commit 455f54f
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions multi-model-endpoints/mme-on-gpu/cv/resnet50_mme_with_gpu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
"runtime_sm_client = boto3.client(\"sagemaker-runtime\")\n",
"sagemaker_session = sagemaker.Session(boto_session=boto3.Session())\n",
"bucket = sagemaker_session.default_bucket()\n",
"prefix = \"resnet-mme-gpu\"\n",
"prefix = \"resnet-mme-gpu-v1\"\n",
"\n",
"# endpoint variables\n",
"sm_model_name = f\"{prefix}-mdl-{ts}\"\n",
Expand Down Expand Up @@ -159,7 +159,7 @@
"\n",
"base = \"amazonaws.com.cn\" if region.startswith(\"cn-\") else \"amazonaws.com\"\n",
"mme_triton_image_uri = (\n",
" \"{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:22.07-py3\".format(\n",
" \"{account_id}.dkr.ecr.{region}.{base}/sagemaker-tritonserver:22.09-py3\".format(\n",
" account_id=account_id_map[region], region=region, base=base\n",
" )\n",
")"
Expand Down Expand Up @@ -200,7 +200,7 @@
"outputs": [],
"source": [
"!docker run --gpus=all --rm -it \\\n",
" -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
" -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.09-py3 \\\n",
" /bin/bash generate_model_pytorch.sh"
]
},
Expand Down Expand Up @@ -289,7 +289,7 @@
"outputs": [],
"source": [
"!docker run --gpus=all --rm -it \\\n",
" -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
" -v `pwd`/workspace:/workspace nvcr.io/nvidia/pytorch:22.09-py3 \\\n",
" /bin/bash generate_model_trt.sh"
]
},
Expand Down Expand Up @@ -457,8 +457,7 @@
"container = {\n",
" \"Image\": mme_triton_image_uri,\n",
" \"ModelDataUrl\": model_data_url,\n",
" \"Mode\": \"MultiModel\",\n",
" \"Environment\": {\"SAGEMAKER_TRITON_DEFAULT_MODEL_NAME\": \"resnet\"},\n",
" \"Mode\": \"MultiModel\"\n",
"}"
]
},
Expand Down Expand Up @@ -577,7 +576,7 @@
"\n",
"Amazon SageMaker multi-model endpoints supports automatic scaling (auto scaling) for your hosted models. Auto scaling dynamically adjusts the number of instances provisioned for a model in response to changes in your workload. When the workload increases, auto scaling brings more instances online. When the workload decreases, auto scaling removes unnecessary instances so that you don't pay for provisioned instances that you aren't using.\n",
"\n",
"In the below scaling policy, use a custom metric GPUMemoryUtilization in TargetTrackingScalingPolicyConfiguration configuration and set a TargetValue of 60.0 for the target value of that metric. This autoscaling policy will provision additional instances upto MaxCapacity when GPU Utilization is more than 60%."
"In the below scaling policy, use a custom metric GPUUtilization in TargetTrackingScalingPolicyConfiguration configuration and set a TargetValue of 60.0 for the target value of that metric. This autoscaling policy will provision additional instances upto MaxCapacity when GPU Utilization is more than 60%."
]
},
{
Expand All @@ -602,16 +601,16 @@
"\n",
"# GPUMemoryUtilization metric\n",
"response = auto_scaling_client.put_scaling_policy(\n",
" PolicyName=\"GPUMemoryUtil-ScalingPolicy\",\n",
" PolicyName=\"GPUUtil-ScalingPolicy\",\n",
" ServiceNamespace=\"sagemaker\",\n",
" ResourceId=resource_id,\n",
" ScalableDimension=\"sagemaker:variant:DesiredInstanceCount\", # SageMaker supports only Instance Count\n",
" PolicyType=\"TargetTrackingScaling\", # 'StepScaling'|'TargetTrackingScaling'\n",
" TargetTrackingScalingPolicyConfiguration={\n",
" # Scale out when GPU Memory utilization hits GPUMemoryUtilization target value.\n",
" # Scale out when GPU utilization hits GPUUtilization target value.\n",
" \"TargetValue\": 60.0,\n",
" \"CustomizedMetricSpecification\": {\n",
" \"MetricName\": \"GPUMemoryUtilization\",\n",
" \"MetricName\": \"GPUUtilization\",\n",
" \"Namespace\": \"/aws/sagemaker/Endpoints\",\n",
" \"Dimensions\": [\n",
" {\"Name\": \"EndpointName\", \"Value\": endpoint_name},\n",
Expand Down Expand Up @@ -809,7 +808,7 @@
" header_length\n",
" ),\n",
" Body=request_body,\n",
" TargetModel=\"resnet_pt_v1.tar.gz\",\n",
" TargetModel=\"resnet_pt_v0.tar.gz\",\n",
")\n",
"\n",
"# Parse json header size length from the response\n",
Expand Down

0 comments on commit 455f54f

Please sign in to comment.