Skip to content

Commit

Permalink
change: Update callback step notebook as per recent sdk changes and f…
Browse files Browse the repository at this point in the history
…ix existing issues (aws#3516)

Co-authored-by: Dewen Qi <[email protected]>
Co-authored-by: Julia Kroll <[email protected]>
  • Loading branch information
3 people authored Aug 17, 2022
1 parent 16e56a6 commit 8159d5d
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 223 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\r\n",
"\u001b[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n",
"You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n"
"\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\r\n",
"\u001B[33mWARNING: You are using pip version 21.1.3; however, version 22.1.2 is available.\r\n",
"You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001B[0m\r\n"
]
}
],
Expand Down Expand Up @@ -3441,4 +3441,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install \"sagemaker==2.91.1\""
"!pip install \"sagemaker>=2.99.0\""
]
},
{
Expand Down Expand Up @@ -977,6 +977,7 @@
"outputs": [],
"source": [
"from sagemaker.workflow.callback_step import CallbackStep, CallbackOutput, CallbackOutputTypeEnum\n",
"from sagemaker.workflow.functions import Join\n",
"\n",
"callback1_output = CallbackOutput(\n",
" output_name=\"s3_data_out\", output_type=CallbackOutputTypeEnum.String\n",
Expand All @@ -987,7 +988,9 @@
" sqs_queue_url=queue_url,\n",
" inputs={\n",
" \"input_location\": f\"s3://{default_bucket}/{taxi_prefix}/\",\n",
" \"output_location\": f\"s3://{default_bucket}/{taxi_prefix}_{id_out}/\",\n",
" \"output_location\": Join(\n",
" on=\"/\", values=[\"s3:/\", default_bucket, f\"{taxi_prefix}_output\", id_out]\n",
" ),\n",
" },\n",
" outputs=[callback1_output],\n",
")"
Expand All @@ -1000,9 +1003,9 @@
"source": [
"#### 2 - Training Step \n",
"\n",
"Next, we'll configure the training step by first configuring the estimator for random cut forest. Then, we'll configure the training step. \n",
"Next, we'll configure the training step by first configuring the estimator for random cut forest. Then, we use the output of the estimator's .fit() method as arguments to the TrainingStep. By passing the pipeline_session to the sagemaker_session, calling .fit() does not launch the training job. Instead, it returns the arguments needed to run the job as a step in the pipeline.\n",
"\n",
"The training step will accept the following **inputs**: \n",
"To generate the step arguments for the training step, it will accept the following **inputs**: \n",
" * S3 location of processed data to be used for model training\n",
" * ECR containing the training image for rcf\n",
" * Estimator configuration\n",
Expand All @@ -1018,6 +1021,8 @@
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.workflow.pipeline_context import PipelineSession\n",
"\n",
"containers = {\n",
" \"us-west-2\": \"174872318107.dkr.ecr.us-west-2.amazonaws.com/randomcutforest:latest\",\n",
" \"us-east-1\": \"382416733822.dkr.ecr.us-east-1.amazonaws.com/randomcutforest:latest\",\n",
Expand All @@ -1028,15 +1033,15 @@
"container = containers[region_name]\n",
"model_prefix = \"model\"\n",
"\n",
"session = sagemaker.Session()\n",
"pipeline_session = PipelineSession()\n",
"\n",
"rcf = sagemaker.estimator.Estimator(\n",
" container,\n",
" sagemaker.get_execution_role(),\n",
" output_path=\"s3://{}/{}/output\".format(default_bucket, model_prefix),\n",
" instance_count=training_instance_count,\n",
" instance_type=\"ml.c5.xlarge\",\n",
" sagemaker_session=session,\n",
" sagemaker_session=pipeline_session,\n",
")\n",
"\n",
"rcf.set_hyperparameters(num_samples_per_tree=200, num_trees=50, feature_dim=1)"
Expand All @@ -1052,9 +1057,7 @@
"from sagemaker.inputs import TrainingInput\n",
"from sagemaker.workflow.steps import TrainingStep\n",
"\n",
"step_train = TrainingStep(\n",
" name=\"TrainModel\",\n",
" estimator=rcf,\n",
"train_step_args = rcf.fit(\n",
" inputs={\n",
" \"train\": TrainingInput(\n",
" # s3_data = Output of the previous call back step\n",
Expand All @@ -1063,6 +1066,10 @@
" distribution=\"ShardedByS3Key\",\n",
" ),\n",
" },\n",
")\n",
"step_train = TrainingStep(\n",
" name=\"TrainModel\",\n",
" step_args=train_step_args,\n",
")"
]
},
Expand All @@ -1073,9 +1080,9 @@
"source": [
"#### 3 - Create Model\n",
"\n",
"Next, we'll package the trained model for deployment. \n",
"Next, we'll package the trained model for deployment. To achieve this, we define the ModelStep by providing the return values from `model.create()` as the step arguments. Similarly, the `pipeline_session` is required when defining the model, which puts off the model creation to the pipeline execution time.\n",
"\n",
"The create model step will accept the following **inputs**: \n",
"To generate the step arguments for the model step, it will accept the following **inputs**: \n",
" * S3 location of the trained model artifact\n",
" * ECR containing the inference image for rcf\n",
" \n",
Expand All @@ -1100,7 +1107,7 @@
"model = Model(\n",
" image_uri=image_uri,\n",
" model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n",
" sagemaker_session=sagemaker_session,\n",
" sagemaker_session=pipeline_session,\n",
" role=role,\n",
")"
]
Expand All @@ -1112,19 +1119,14 @@
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.inputs import CreateModelInput\n",
"from sagemaker.workflow.steps import CreateModelStep\n",
"from sagemaker.workflow.model_step import ModelStep\n",
"\n",
"\n",
"inputs = CreateModelInput(\n",
"model_step_args = model.create(\n",
" instance_type=\"ml.m5.large\",\n",
")\n",
"\n",
"create_model = CreateModelStep(\n",
" name=\"TaxiModel\",\n",
" model=model,\n",
" inputs=inputs,\n",
")"
"create_model = ModelStep(name=\"TaxiModel\", step_args=model_step_args)"
]
},
{
Expand All @@ -1134,9 +1136,9 @@
"source": [
"#### 4 - Batch Transform\n",
"\n",
"Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input. \n",
"Next, we'll deploy the model using batch transform then do a quick evaluation with our data to compute anomaly scores for each of our data points on input.\n",
"\n",
"The batch transform step will accept the following **inputs**: \n",
"To generate the step arguments for the batch transform step, it will accept the following **inputs**: \n",
" * SageMaker packaged model\n",
" * S3 location of the input data\n",
" * ECR containing the inference image for rcf\n",
Expand Down Expand Up @@ -1164,6 +1166,7 @@
" accept=\"text/csv\",\n",
" instance_count=1,\n",
" output_path=f\"s3://{default_bucket}/{output_prefix}/\",\n",
" sagemaker_session=pipeline_session,\n",
")"
]
},
Expand All @@ -1179,17 +1182,18 @@
"\n",
"batch_data = step_callback_data.properties.Outputs[\"s3_data_out\"]\n",
"\n",
"transform_step_args = transformer.transform(\n",
" data=batch_data,\n",
" content_type=\"text/csv\",\n",
" split_type=\"Line\",\n",
" input_filter=\"$[0]\",\n",
" join_source=\"Input\",\n",
" output_filter=\"$[0,-1]\",\n",
")\n",
"\n",
"step_transform = TransformStep(\n",
" name=\"TaxiTransform\",\n",
" transformer=transformer,\n",
" inputs=TransformInput(\n",
" data=batch_data,\n",
" content_type=\"text/csv\",\n",
" split_type=\"Line\",\n",
" input_filter=\"$[0]\",\n",
" join_source=\"Input\",\n",
" output_filter=\"$[0,-1]\",\n",
" ),\n",
" step_args=transform_step_args,\n",
")"
]
},
Expand All @@ -1201,19 +1205,6 @@
"### Configure Pipeline Using Created Steps"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e646229c",
"metadata": {},
"outputs": [],
"source": [
"import uuid\n",
"\n",
"id_out = uuid.uuid4().hex\n",
"print(\"Unique ID:\", id_out)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -1222,8 +1213,9 @@
"outputs": [],
"source": [
"from sagemaker.workflow.pipeline import Pipeline\n",
"from sagemaker.utils import unique_name_from_base\n",
"\n",
"pipeline_name = f\"GluePipeline-{id_out}\"\n",
"pipeline_name = unique_name_from_base(\"GluePipeline\")\n",
"pipeline = Pipeline(\n",
" name=pipeline_name,\n",
" parameters=[\n",
Expand Down Expand Up @@ -1318,9 +1310,9 @@
"metadata": {
"instance_type": "ml.t3.medium",
"kernelspec": {
"display_name": "Python 3 (Data Science)",
"display_name": "Python 3",
"language": "python",
"name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/datascience-1.0"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -1332,9 +1324,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
"version": "3.6.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Loading

0 comments on commit 8159d5d

Please sign in to comment.